# !pip install git+https://github.com/alberanid/imdbpy
# !pip install pandas
# !pip install numpy
# !pip install matplotlib
# !pip install seaborn
# !pip install pandas_profiling --upgrade
# !pip install plotly
# !pip install wordcloud
# !pip install Flask
# Import Dataset
# Import File from Loacal Drive
# from google.colab import files
# data_to_load = files.upload()
# from google.colab import drive
# drive.mount('/content/drive')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import collections
import plotly.express as px
import plotly.graph_objects as go
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from nltk.util import ngrams
from plotly.subplots import make_subplots
from plotly.offline import iplot, init_notebook_mode
from wordcloud import WordCloud, STOPWORDS
from pandas_profiling import ProfileReport
%matplotlib inline
warnings.filterwarnings("ignore")
nltk.download('all')
[nltk_data] Downloading collection 'all' [nltk_data] | [nltk_data] | Downloading package abc to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package abc is already up-to-date! [nltk_data] | Downloading package alpino to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package alpino is already up-to-date! [nltk_data] | Downloading package biocreative_ppi to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package biocreative_ppi is already up-to-date! [nltk_data] | Downloading package brown to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package brown is already up-to-date! [nltk_data] | Downloading package brown_tei to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package brown_tei is already up-to-date! [nltk_data] | Downloading package cess_cat to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package cess_cat is already up-to-date! [nltk_data] | Downloading package cess_esp to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package cess_esp is already up-to-date! [nltk_data] | Downloading package chat80 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package chat80 is already up-to-date! [nltk_data] | Downloading package city_database to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package city_database is already up-to-date! [nltk_data] | Downloading package cmudict to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package cmudict is already up-to-date! [nltk_data] | Downloading package comparative_sentences to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package comparative_sentences is already up-to- [nltk_data] | date! [nltk_data] | Downloading package comtrans to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package comtrans is already up-to-date! [nltk_data] | Downloading package conll2000 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package conll2000 is already up-to-date! [nltk_data] | Downloading package conll2002 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package conll2002 is already up-to-date! [nltk_data] | Downloading package conll2007 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package conll2007 is already up-to-date! [nltk_data] | Downloading package crubadan to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package crubadan is already up-to-date! [nltk_data] | Downloading package dependency_treebank to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package dependency_treebank is already up-to-date! [nltk_data] | Downloading package dolch to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package dolch is already up-to-date! [nltk_data] | Downloading package europarl_raw to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package europarl_raw is already up-to-date! [nltk_data] | Downloading package floresta to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package floresta is already up-to-date! [nltk_data] | Downloading package framenet_v15 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package framenet_v15 is already up-to-date! [nltk_data] | Downloading package framenet_v17 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package framenet_v17 is already up-to-date! [nltk_data] | Downloading package gazetteers to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package gazetteers is already up-to-date! [nltk_data] | Downloading package genesis to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package genesis is already up-to-date! [nltk_data] | Downloading package gutenberg to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package gutenberg is already up-to-date! [nltk_data] | Downloading package ieer to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package ieer is already up-to-date! [nltk_data] | Downloading package inaugural to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package inaugural is already up-to-date! [nltk_data] | Downloading package indian to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package indian is already up-to-date! [nltk_data] | Downloading package jeita to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package jeita is already up-to-date! [nltk_data] | Downloading package kimmo to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package kimmo is already up-to-date! [nltk_data] | Downloading package knbc to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package knbc is already up-to-date! [nltk_data] | Downloading package lin_thesaurus to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package lin_thesaurus is already up-to-date! [nltk_data] | Downloading package mac_morpho to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package mac_morpho is already up-to-date! [nltk_data] | Downloading package machado to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package machado is already up-to-date! [nltk_data] | Downloading package masc_tagged to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package masc_tagged is already up-to-date! [nltk_data] | Downloading package moses_sample to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package moses_sample is already up-to-date! [nltk_data] | Downloading package movie_reviews to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package movie_reviews is already up-to-date! [nltk_data] | Downloading package names to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package names is already up-to-date! [nltk_data] | Downloading package nombank.1.0 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package nombank.1.0 is already up-to-date! [nltk_data] | Downloading package nps_chat to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package nps_chat is already up-to-date! [nltk_data] | Downloading package omw to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package omw is already up-to-date! [nltk_data] | Downloading package opinion_lexicon to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package opinion_lexicon is already up-to-date! [nltk_data] | Downloading package paradigms to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package paradigms is already up-to-date! [nltk_data] | Downloading package pil to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package pil is already up-to-date! [nltk_data] | Downloading package pl196x to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package pl196x is already up-to-date! [nltk_data] | Downloading package ppattach to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package ppattach is already up-to-date! [nltk_data] | Downloading package problem_reports to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package problem_reports is already up-to-date! [nltk_data] | Downloading package propbank to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package propbank is already up-to-date! [nltk_data] | Downloading package ptb to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package ptb is already up-to-date! [nltk_data] | Downloading package product_reviews_1 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package product_reviews_1 is already up-to-date! [nltk_data] | Downloading package product_reviews_2 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package product_reviews_2 is already up-to-date! [nltk_data] | Downloading package pros_cons to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package pros_cons is already up-to-date! [nltk_data] | Downloading package qc to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package qc is already up-to-date! [nltk_data] | Downloading package reuters to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package reuters is already up-to-date! [nltk_data] | Downloading package rte to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package rte is already up-to-date! [nltk_data] | Downloading package semcor to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package semcor is already up-to-date! [nltk_data] | Downloading package senseval to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package senseval is already up-to-date! [nltk_data] | Downloading package sentiwordnet to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package sentiwordnet is already up-to-date! [nltk_data] | Downloading package sentence_polarity to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package sentence_polarity is already up-to-date! [nltk_data] | Downloading package shakespeare to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package shakespeare is already up-to-date! [nltk_data] | Downloading package sinica_treebank to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package sinica_treebank is already up-to-date! [nltk_data] | Downloading package smultron to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package smultron is already up-to-date! [nltk_data] | Downloading package state_union to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package state_union is already up-to-date! [nltk_data] | Downloading package stopwords to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package stopwords is already up-to-date! [nltk_data] | Downloading package subjectivity to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package subjectivity is already up-to-date! [nltk_data] | Downloading package swadesh to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package swadesh is already up-to-date! [nltk_data] | Downloading package switchboard to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package switchboard is already up-to-date! [nltk_data] | Downloading package timit to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package timit is already up-to-date! [nltk_data] | Downloading package toolbox to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package toolbox is already up-to-date! [nltk_data] | Downloading package treebank to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package treebank is already up-to-date! [nltk_data] | Downloading package twitter_samples to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package twitter_samples is already up-to-date! [nltk_data] | Downloading package udhr to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package udhr is already up-to-date! [nltk_data] | Downloading package udhr2 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package udhr2 is already up-to-date! [nltk_data] | Downloading package unicode_samples to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package unicode_samples is already up-to-date! [nltk_data] | Downloading package universal_treebanks_v20 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package universal_treebanks_v20 is already up-to- [nltk_data] | date! [nltk_data] | Downloading package verbnet to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package verbnet is already up-to-date! [nltk_data] | Downloading package verbnet3 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package verbnet3 is already up-to-date! [nltk_data] | Downloading package webtext to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package webtext is already up-to-date! [nltk_data] | Downloading package wordnet to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package wordnet is already up-to-date! [nltk_data] | Downloading package wordnet_ic to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package wordnet_ic is already up-to-date! [nltk_data] | Downloading package words to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package words is already up-to-date! [nltk_data] | Downloading package ycoe to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package ycoe is already up-to-date! [nltk_data] | Downloading package rslp to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package rslp is already up-to-date! [nltk_data] | Downloading package maxent_treebank_pos_tagger to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package maxent_treebank_pos_tagger is already up- [nltk_data] | to-date! [nltk_data] | Downloading package universal_tagset to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package universal_tagset is already up-to-date! [nltk_data] | Downloading package maxent_ne_chunker to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package maxent_ne_chunker is already up-to-date! [nltk_data] | Downloading package punkt to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package punkt is already up-to-date! [nltk_data] | Downloading package book_grammars to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package book_grammars is already up-to-date! [nltk_data] | Downloading package sample_grammars to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package sample_grammars is already up-to-date! [nltk_data] | Downloading package spanish_grammars to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package spanish_grammars is already up-to-date! [nltk_data] | Downloading package basque_grammars to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package basque_grammars is already up-to-date! [nltk_data] | Downloading package large_grammars to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package large_grammars is already up-to-date! [nltk_data] | Downloading package tagsets to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package tagsets is already up-to-date! [nltk_data] | Downloading package snowball_data to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package snowball_data is already up-to-date! [nltk_data] | Downloading package bllip_wsj_no_aux to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package bllip_wsj_no_aux is already up-to-date! [nltk_data] | Downloading package word2vec_sample to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package word2vec_sample is already up-to-date! [nltk_data] | Downloading package panlex_swadesh to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package panlex_swadesh is already up-to-date! [nltk_data] | Downloading package mte_teip5 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package mte_teip5 is already up-to-date! [nltk_data] | Downloading package averaged_perceptron_tagger to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package averaged_perceptron_tagger is already up- [nltk_data] | to-date! [nltk_data] | Downloading package averaged_perceptron_tagger_ru to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package averaged_perceptron_tagger_ru is already [nltk_data] | up-to-date! [nltk_data] | Downloading package perluniprops to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package perluniprops is already up-to-date! [nltk_data] | Downloading package nonbreaking_prefixes to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package nonbreaking_prefixes is already up-to-date! [nltk_data] | Downloading package vader_lexicon to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package vader_lexicon is already up-to-date! [nltk_data] | Downloading package porter_test to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package porter_test is already up-to-date! [nltk_data] | Downloading package wmt15_eval to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package wmt15_eval is already up-to-date! [nltk_data] | Downloading package mwa_ppdb to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package mwa_ppdb is already up-to-date! [nltk_data] | [nltk_data] Done downloading collection all
True
# path = '/content/drive/MyDrive/Files/'
path = 'C:\\Users\\pawan\\OneDrive\\Desktop\\ott\\Data\\'
df_movies = pd.read_csv(path + 'ottmovies.csv')
df_movies.head()
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | Language | Plotline | Runtime | Kind | Seasons | Netflix | Hulu | Prime Video | Disney+ | Type | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Inception | 2010 | 13+ | 8.8 | 87% | Christopher Nolan | Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... | Action,Adventure,Sci-Fi,Thriller | United States,United Kingdom | English,Japanese,French | Dom Cobb is a skilled thief, the absolute best... | 148.0 | movie | NaN | 1 | 0 | 0 | 0 | 0 |
| 1 | 2 | The Matrix | 1999 | 16+ | 8.7 | 88% | Lana Wachowski,Lilly Wachowski | Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... | Action,Sci-Fi | United States | English | Thomas A. Anderson is a man living two lives. ... | 136.0 | movie | NaN | 1 | 0 | 0 | 0 | 0 |
| 2 | 3 | Avengers: Infinity War | 2018 | 13+ | 8.4 | 85% | Anthony Russo,Joe Russo | Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... | Action,Adventure,Sci-Fi | United States | English | As the Avengers and their allies have continue... | 149.0 | movie | NaN | 1 | 0 | 0 | 0 | 0 |
| 3 | 4 | Back to the Future | 1985 | 7+ | 8.5 | 96% | Robert Zemeckis | Michael J. Fox,Christopher Lloyd,Lea Thompson,... | Adventure,Comedy,Sci-Fi | United States | English | Marty McFly, a typical American teenager of th... | 116.0 | movie | NaN | 1 | 0 | 0 | 0 | 0 |
| 4 | 5 | The Good, the Bad and the Ugly | 1966 | 16+ | 8.8 | 97% | Sergio Leone | Eli Wallach,Clint Eastwood,Lee Van Cleef,Aldo ... | Western | Italy,Spain,West Germany,United States | Italian | Blondie (The Good) (Clint Eastwood) is a profe... | 161.0 | movie | NaN | 1 | 0 | 1 | 0 | 0 |
# profile = ProfileReport(df_movies)
# profile
def data_investigate(df):
print('No of Rows : ', df.shape[0])
print('No of Coloums : ', df.shape[1])
print('**'*25)
print('Colums Names : \n', df.columns)
print('**'*25)
print('Datatype of Columns : \n', df.dtypes)
print('**'*25)
print('Missing Values : ')
c = df.isnull().sum()
c = c[c > 0]
print(c)
print('**'*25)
print('Missing vaules %age wise :\n')
print((100*(df.isnull().sum()/len(df.index))))
print('**'*25)
print('Pictorial Representation : ')
plt.figure(figsize = (10, 10))
sns.heatmap(df.isnull(), yticklabels = False, cbar = False)
plt.show()
data_investigate(df_movies)
No of Rows : 16923
No of Coloums : 20
**************************************************
Colums Names :
Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type'],
dtype='object')
**************************************************
Datatype of Columns :
ID int64
Title object
Year int64
Age object
IMDb float64
Rotten Tomatoes object
Directors object
Cast object
Genres object
Country object
Language object
Plotline object
Runtime float64
Kind object
Seasons float64
Netflix int64
Hulu int64
Prime Video int64
Disney+ int64
Type int64
dtype: object
**************************************************
Missing Values :
Age 8457
IMDb 328
Rotten Tomatoes 10437
Directors 357
Cast 648
Genres 234
Country 303
Language 437
Plotline 4958
Runtime 382
Seasons 16923
dtype: int64
**************************************************
Missing vaules %age wise :
ID 0.000000
Title 0.000000
Year 0.000000
Age 49.973409
IMDb 1.938191
Rotten Tomatoes 61.673462
Directors 2.109555
Cast 3.829108
Genres 1.382734
Country 1.790463
Language 2.582284
Plotline 29.297406
Runtime 2.257283
Kind 0.000000
Seasons 100.000000
Netflix 0.000000
Hulu 0.000000
Prime Video 0.000000
Disney+ 0.000000
Type 0.000000
dtype: float64
**************************************************
Pictorial Representation :
# ID
# df_movies = df_movies.drop(['ID'], axis = 1)
# Age
df_movies.loc[df_movies['Age'].isnull() & df_movies['Disney+'] == 1, "Age"] = '13'
# df_movies.fillna({'Age' : 18}, inplace = True)
df_movies.fillna({'Age' : 'NR'}, inplace = True)
df_movies['Age'].replace({'all': '0'}, inplace = True)
df_movies['Age'].replace({'7+': '7'}, inplace = True)
df_movies['Age'].replace({'13+': '13'}, inplace = True)
df_movies['Age'].replace({'16+': '16'}, inplace = True)
df_movies['Age'].replace({'18+': '18'}, inplace = True)
# df_movies['Age'] = df_movies['Age'].astype(int)
# IMDb
# df_movies.fillna({'IMDb' : df_movies['IMDb'].mean()}, inplace = True)
# df_movies.fillna({'IMDb' : df_movies['IMDb'].median()}, inplace = True)
df_movies.fillna({'IMDb' : "NA"}, inplace = True)
# Rotten Tomatoes
df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'][df_movies['Rotten Tomatoes'].notnull()].str.replace('%', '').astype(int)
# df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'][df_movies['Rotten Tomatoes'].notnull()].astype(int)
# df_movies.fillna({'Rotten Tomatoes' : df_movies['Rotten Tomatoes'].mean()}, inplace = True)
# df_movies.fillna({'Rotten Tomatoes' : df_movies['Rotten Tomatoes'].median()}, inplace = True)
# df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'].astype(int)
df_movies.fillna({'Rotten Tomatoes' : "NA"}, inplace = True)
# Directors
# df_movies = df_movies.drop(['Directors'], axis = 1)
df_movies.fillna({'Directors' : "NA"}, inplace = True)
# Cast
df_movies.fillna({'Cast' : "NA"}, inplace = True)
# Genres
df_movies.fillna({'Genres': "NA"}, inplace = True)
# Country
df_movies.fillna({'Country': "NA"}, inplace = True)
# Language
df_movies.fillna({'Language': "NA"}, inplace = True)
# Plotline
df_movies.fillna({'Plotline': "NA"}, inplace = True)
# Runtime
# df_movies.fillna({'Runtime' : df_movies['Runtime'].mean()}, inplace = True)
# df_movies['Runtime'] = df_movies['Runtime'].astype(int)
df_movies.fillna({'Runtime' : "NA"}, inplace = True)
# Kind
# df_movies.fillna({'Kind': "NA"}, inplace = True)
# Type
# df_movies.fillna({'Type': "NA"}, inplace = True)
# df_movies = df_movies.drop(['Type'], axis = 1)
# Seasons
# df_movies.fillna({'Seasons': 1}, inplace = True)
# df_movies.fillna({'Seasons': "NA"}, inplace = True)
df_movies = df_movies.drop(['Seasons'], axis = 1)
# df_movies['Seasons'] = df_movies['Seasons'].astype(int)
# df_movies.fillna({'Seasons' : df_movies['Seasons'].mean()}, inplace = True)
# df_movies['Seasons'] = df_movies['Seasons'].astype(int)
# Service Provider
df_movies['Service Provider'] = df_movies.loc[:, ['Netflix', 'Prime Video', 'Disney+', 'Hulu']].idxmax(axis = 1)
# df_movies.drop(['Netflix','Prime Video','Disney+','Hulu'], axis = 1)
# Removing Duplicate and Missing Entries
df_movies.dropna(how = 'any', inplace = True)
df_movies.drop_duplicates(inplace = True)
data_investigate(df_movies)
No of Rows : 16923
No of Coloums : 20
**************************************************
Colums Names :
Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type',
'Service Provider'],
dtype='object')
**************************************************
Datatype of Columns :
ID int64
Title object
Year int64
Age object
IMDb object
Rotten Tomatoes object
Directors object
Cast object
Genres object
Country object
Language object
Plotline object
Runtime object
Kind object
Netflix int64
Hulu int64
Prime Video int64
Disney+ int64
Type int64
Service Provider object
dtype: object
**************************************************
Missing Values :
Series([], dtype: int64)
**************************************************
Missing vaules %age wise :
ID 0.0
Title 0.0
Year 0.0
Age 0.0
IMDb 0.0
Rotten Tomatoes 0.0
Directors 0.0
Cast 0.0
Genres 0.0
Country 0.0
Language 0.0
Plotline 0.0
Runtime 0.0
Kind 0.0
Netflix 0.0
Hulu 0.0
Prime Video 0.0
Disney+ 0.0
Type 0.0
Service Provider 0.0
dtype: float64
**************************************************
Pictorial Representation :
df_movies.head()
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | Language | Plotline | Runtime | Kind | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Inception | 2010 | 13 | 8.8 | 87 | Christopher Nolan | Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... | Action,Adventure,Sci-Fi,Thriller | United States,United Kingdom | English,Japanese,French | Dom Cobb is a skilled thief, the absolute best... | 148 | movie | 1 | 0 | 0 | 0 | 0 | Netflix |
| 1 | 2 | The Matrix | 1999 | 16 | 8.7 | 88 | Lana Wachowski,Lilly Wachowski | Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... | Action,Sci-Fi | United States | English | Thomas A. Anderson is a man living two lives. ... | 136 | movie | 1 | 0 | 0 | 0 | 0 | Netflix |
| 2 | 3 | Avengers: Infinity War | 2018 | 13 | 8.4 | 85 | Anthony Russo,Joe Russo | Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... | Action,Adventure,Sci-Fi | United States | English | As the Avengers and their allies have continue... | 149 | movie | 1 | 0 | 0 | 0 | 0 | Netflix |
| 3 | 4 | Back to the Future | 1985 | 7 | 8.5 | 96 | Robert Zemeckis | Michael J. Fox,Christopher Lloyd,Lea Thompson,... | Adventure,Comedy,Sci-Fi | United States | English | Marty McFly, a typical American teenager of th... | 116 | movie | 1 | 0 | 0 | 0 | 0 | Netflix |
| 4 | 5 | The Good, the Bad and the Ugly | 1966 | 16 | 8.8 | 97 | Sergio Leone | Eli Wallach,Clint Eastwood,Lee Van Cleef,Aldo ... | Western | Italy,Spain,West Germany,United States | Italian | Blondie (The Good) (Clint Eastwood) is a profe... | 161 | movie | 1 | 0 | 1 | 0 | 0 | Netflix |
df_movies.describe()
| ID | Year | Netflix | Hulu | Prime Video | Disney+ | Type | |
|---|---|---|---|---|---|---|---|
| count | 16923.000000 | 16923.000000 | 16923.000000 | 16923.000000 | 16923.000000 | 16923.000000 | 16923.0 |
| mean | 8462.000000 | 2003.211901 | 0.214915 | 0.062637 | 0.727235 | 0.033150 | 0.0 |
| std | 4885.393638 | 20.526532 | 0.410775 | 0.242315 | 0.445394 | 0.179034 | 0.0 |
| min | 1.000000 | 1901.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 |
| 25% | 4231.500000 | 2001.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 |
| 50% | 8462.000000 | 2012.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.0 |
| 75% | 12692.500000 | 2016.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.0 |
| max | 16923.000000 | 2020.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.0 |
df_movies.corr()
| ID | Year | Netflix | Hulu | Prime Video | Disney+ | Type | |
|---|---|---|---|---|---|---|---|
| ID | 1.000000 | -0.217816 | -0.644470 | -0.129926 | 0.469301 | 0.263530 | NaN |
| Year | -0.217816 | 1.000000 | 0.256151 | 0.101337 | -0.255578 | -0.047258 | NaN |
| Netflix | -0.644470 | 0.256151 | 1.000000 | -0.118032 | -0.745141 | -0.089649 | NaN |
| Hulu | -0.129926 | 0.101337 | -0.118032 | 1.000000 | -0.284654 | -0.039693 | NaN |
| Prime Video | 0.469301 | -0.255578 | -0.745141 | -0.284654 | 1.000000 | -0.289008 | NaN |
| Disney+ | 0.263530 | -0.047258 | -0.089649 | -0.039693 | -0.289008 | 1.000000 | NaN |
| Type | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
# df_movies.sort_values('Year', ascending = True)
# df_movies.sort_values('IMDb', ascending = False)
# df_movies.to_csv(path_or_buf= '/content/drive/MyDrive/Files/updated_ottmovies.csv', index = False)
# path = '/content/drive/MyDrive/Files/'
# udf_movies = pd.read_csv(path + 'updated_ottmovies.csv')
# udf_movies
# df_netflix_movies = df_movies.loc[(df_movies['Netflix'] > 0)]
# df_hulu_movies = df_movies.loc[(df_movies['Hulu'] > 0)]
# df_prime_video_movies = df_movies.loc[(df_movies['Prime Video'] > 0)]
# df_disney_movies = df_movies.loc[(df_movies['Disney+'] > 0)]
df_netflix_only_movies = df_movies[(df_movies['Netflix'] == 1) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 0)]
df_hulu_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 1) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 0)]
df_prime_video_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 1 ) & (df_movies['Disney+'] == 0)]
df_disney_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 1)]
df_movies_runtimes = df_movies.copy()
df_movies_runtimes.drop(df_movies_runtimes.loc[df_movies_runtimes['Runtime'] == "NA"].index, inplace = True)
# df_movies_runtimes = df_movies_runtimes[df_movies_runtimes.Runtime != "NA"]
df_movies_runtimes['Runtime'] = df_movies_runtimes['Runtime'].astype(int)
# Creating distinct dataframes only with the movies present on individual streaming platforms
netflix_runtimes_movies = df_movies_runtimes.loc[df_movies_runtimes['Netflix'] == 1]
hulu_runtimes_movies = df_movies_runtimes.loc[df_movies_runtimes['Hulu'] == 1]
prime_video_runtimes_movies = df_movies_runtimes.loc[df_movies_runtimes['Prime Video'] == 1]
disney_runtimes_movies = df_movies_runtimes.loc[df_movies_runtimes['Disney+'] == 1]
df_movies_runtimes_group = df_movies_runtimes.copy()
df_movies_screentimes = df_movies_runtimes.copy()
df_movies_screentimes['Screentime'] = round(df_movies_runtimes['Runtime']/60, ndigits = 2)
# Creating distinct dataframes only with the movies present on individual streaming platforms
netflix_screentimes_movies = df_movies_screentimes.loc[df_movies_screentimes['Netflix'] == 1]
hulu_screentimes_movies = df_movies_screentimes.loc[df_movies_screentimes['Hulu'] == 1]
prime_video_screentimes_movies = df_movies_screentimes.loc[df_movies_screentimes['Prime Video'] == 1]
disney_screentimes_movies = df_movies_screentimes.loc[df_movies_screentimes['Disney+'] == 1]
plt.figure(figsize = (10, 10))
corr = df_movies_runtimes.corr()
# Plot figsize
fig, ax = plt.subplots(figsize=(10, 8))
# Generate Heat Map, allow annotations and place floats in map
sns.heatmap(corr, cmap = 'magma', annot = True, fmt = ".2f")
# Apply xticks
plt.xticks(range(len(corr.columns)), corr.columns);
# Apply yticks
plt.yticks(range(len(corr.columns)), corr.columns)
# show plot
plt.show()
fig.show()
<Figure size 720x720 with 0 Axes>
df_runtimes_high_movies = df_movies_runtimes.sort_values(by = 'Runtime', ascending = False).reset_index()
df_runtimes_high_movies = df_runtimes_high_movies.drop(['index'], axis = 1)
# filter = (df_movies_runtimes['Runtime'] == (df_movies_runtimes['Runtime'].max()))
# df_runtimes_high_movies = df_movies_runtimes[filter]
# highest_rated_movies = df_movies_runtimes.loc[df_movies_runtimes['Runtime'].idxmax()]
print('\nMovies with Highest Ever Runtime are : \n')
df_runtimes_high_movies.head(5)
Movies with Highest Ever Runtime are :
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | Language | Plotline | Runtime | Kind | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16741 | The Remarkable 20th Century | 2004 | NR | 7.8 | NA | Scott Popjes,Steven Vosburgh | Howard K. Smith,Jimmy Hodson | Documentary | United States | NA | This four-part series takes an in-depth look a... | 600 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video |
| 1 | 16720 | The Ultimate Civil War Series: 150th Anniversa... | 2012 | NR | 6.9 | NA | Kevin R. Hershberger | Steve Alexander,Randy Allen,Coby Batty,Scott W... | Documentary,Action,Drama,History,War | United States | English | NA | 353 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video |
| 2 | 12686 | Custer's Last Stand | 1936 | NR | 4.7 | NA | Elmer Clifton | Rex Lease,Lona Andre,William Farnum,Ruth Mix,J... | Adventure,History,Romance,War,Western | United States | English | A cruel and ruthless bandit kills a tavern own... | 328 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video |
| 3 | 3755 | Dina | 2017 | 13 | 6.8 | 98 | Denis Villeneuve | Zendaya,Rebecca Ferguson,Jason Momoa,Dave Baut... | Adventure,Drama,Sci-Fi | Canada,Hungary,United States | English | A mythic and emotionally charged hero's journe... | 265 | movie | 0 | 1 | 0 | 0 | 0 | Hulu |
| 4 | 5520 | The Greatest Story Ever Told | 1965 | 0 | 6.6 | 41 | George Stevens,David Lean,Jean Negulesco | Max von Sydow,Michael Anderson Jr.,Carroll Bak... | Biography,Drama,History | United States | English | At 30, Patrick O'Brien was TransFatty, a New Y... | 260 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video |
fig = px.bar(y = df_runtimes_high_movies['Title'][:15],
x = df_runtimes_high_movies['Runtime'][:15],
color = df_runtimes_high_movies['Runtime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
title = 'Movies with Highest Runtime in Minutes : All Platforms')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
df_runtimes_low_movies = df_movies_runtimes.sort_values(by = 'Runtime', ascending = True).reset_index()
df_runtimes_low_movies = df_runtimes_low_movies.drop(['index'], axis = 1)
# filter = (df_movies_runtimes['Runtime'] == (df_movies_runtimes['Runtime'].min()))
# df_runtimes_low_movies = df_movies_runtimes[filter]
print('\nMovies with Lowest Ever Runtime are : \n')
df_runtimes_low_movies.head(5)
Movies with Lowest Ever Runtime are :
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | Language | Plotline | Runtime | Kind | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 13905 | Thanksgiving | 2014 | NR | 7.7 | 100 | Eli Roth | Mark Bakunas,Vendula Bednarova,Chris Briggs,Da... | Short,Comedy,Horror | United States | English | Anthony Dexter---bare-chested most of the film... | 2 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video |
| 1 | 15583 | Jurassic Africa | 2018 | NR | 6.6 | NA | NA | Rick Carter,Gerald R. Molen,Steven Spielberg | Short | United States | English | Baseball Hall of Famer Reggie Jackson provides... | 2 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video |
| 2 | 15971 | Luxo Jr. | 1986 | 0 | 7.3 | NA | John Lasseter | NA | Animation,Short,Family | United States | None | Alameda Slim (Randy Quaid), a wanted cattle ru... | 2 | movie | 0 | 0 | 0 | 1 | 0 | Disney+ |
| 3 | 14728 | #LoveSwag | 2015 | 13 | 4.4 | NA | Austin Davoren | Giovanni Watson,Yaritza Betancourt,Shaun Royer | Short,Comedy,Drama,Romance | United States | English | Dead bodies are being found in the New York ha... | 2 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video |
| 4 | 16572 | A Brief History | 2016 | NR | 7.5 | NA | Ion Popescu-Gopo | NA | Animation,Short | Romania | Romanian | Three-part series, Around the Way, celebrates ... | 3 | movie | 0 | 1 | 0 | 0 | 0 | Hulu |
fig = px.bar(y = df_runtimes_low_movies['Title'][:15],
x = df_runtimes_low_movies['Runtime'][:15],
color = df_runtimes_low_movies['Runtime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
title = 'Movies with Lowest Runtime in Minutes : All Platforms')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
print(f'''
Total '{df_movies_runtimes['Runtime'].unique().shape[0]}' unique Runtime s were Given, They were Like this,\n
{df_movies_runtimes.sort_values(by = 'Runtime', ascending = False)['Runtime'].unique()}\n
The Highest Ever Runtime Ever Any Movie Got is '{df_runtimes_high_movies['Title'][0]}' : '{df_runtimes_high_movies['Runtime'].max()}'\n
The Lowest Ever Runtime Ever Any Movie Got is '{df_runtimes_low_movies['Title'][0]}' : '{df_runtimes_low_movies['Runtime'].min()}'\n
''')
Total '220' unique Runtime s were Given, They were Like this,
[600 353 328 265 260 259 258 256 255 242 240 238 233 227 224 220 216 215
213 212 210 209 206 204 201 200 197 195 194 193 192 191 189 188 187 186
185 184 183 182 181 180 179 178 177 176 175 174 173 172 171 170 169 168
167 166 165 164 163 162 161 160 159 158 157 156 155 154 153 152 151 150
149 148 147 146 145 144 143 142 141 140 139 138 137 136 135 134 133 132
131 130 129 128 127 126 125 124 123 122 121 120 119 118 117 116 115 114
113 112 111 110 109 108 107 106 105 104 103 102 101 100 99 98 97 96
95 94 93 92 91 90 89 88 87 86 85 84 83 82 81 80 79 78
77 76 75 74 73 72 71 70 69 68 67 66 65 64 63 62 61 60
59 58 57 56 55 54 53 52 51 50 49 48 47 46 45 44 43 42
41 40 39 38 37 36 35 34 33 32 31 30 29 28 27 26 25 24
23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6
5 4 3 2]
The Highest Ever Runtime Ever Any Movie Got is 'The Remarkable 20th Century' : '600'
The Lowest Ever Runtime Ever Any Movie Got is 'Thanksgiving' : '2'
netflix_runtimes_high_movies = df_runtimes_high_movies.loc[df_runtimes_high_movies['Netflix']==1].reset_index()
netflix_runtimes_high_movies = netflix_runtimes_high_movies.drop(['index'], axis = 1)
netflix_runtimes_low_movies = df_runtimes_low_movies.loc[df_runtimes_low_movies['Netflix']==1].reset_index()
netflix_runtimes_low_movies = netflix_runtimes_low_movies.drop(['index'], axis = 1)
netflix_runtimes_high_movies.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | Language | Plotline | Runtime | Kind | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2207 | The Gospel of Matthew | 2014 | 0 | 7.7 | NA | Regardt van den Bergh | Richard Kiley,Bruce Marchiano,Gerrit Schoonhov... | Biography,Drama,History | South Africa | English | Matthew 15:1 - 28:20 - The year is about 62 A.... | 258 | movie | 1 | 0 | 0 | 0 | 0 | Netflix |
| 1 | 80 | Lagaan: Once Upon a Time in India | 2001 | 7 | 8.1 | 95 | Ashutosh Gowariker | Aamir Khan,Gracy Singh,Rachel Shelley,Paul Bla... | Drama,Musical,Sport | India,United Kingdom | Hindi,English | This is the story about the resilience shown b... | 224 | movie | 1 | 0 | 0 | 0 | 0 | Netflix |
| 2 | 2248 | Jatt James Bond | 2014 | 7 | 6.7 | NA | Rohit Jugraj | Gippy Grewal,Zareen Khan,Gurpreet Ghuggi,Yashp... | Comedy | India | Punjabi | NA | 220 | movie | 1 | 0 | 0 | 0 | 0 | Netflix |
| 3 | 2485 | The Gospel of Luke | 2015 | NR | 7.1 | NA | David Batty | Selva Rasalingam,Karima Gouit,Mourad Zaoui,El ... | Drama | United States,United Kingdom,Morocco | English,Spanish | NA | 215 | movie | 1 | 0 | 0 | 0 | 0 | Netflix |
| 4 | 368 | Jodhaa Akbar | 2008 | 13 | 7.6 | 75 | Ashutosh Gowariker | Hrithik Roshan,Aishwarya Rai Bachchan,Sonu Soo... | Action,Drama,History,Romance,War | India | Hindi,Urdu | Jodhaa Akbar is a sixteenth century love story... | 213 | movie | 1 | 0 | 0 | 0 | 0 | Netflix |
fig = px.bar(y = netflix_runtimes_high_movies['Title'][:15],
x = netflix_runtimes_high_movies['Runtime'][:15],
color = netflix_runtimes_high_movies['Runtime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
title = 'Movies with Highest Runtime in Minutes : Netflix')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.bar(y = netflix_runtimes_low_movies['Title'][:15],
x = netflix_runtimes_low_movies['Runtime'][:15],
color = netflix_runtimes_low_movies['Runtime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
title = 'Movies with Lowest Runtime in Minutes : Netflix')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
hulu_runtimes_high_movies = df_runtimes_high_movies.loc[df_runtimes_high_movies['Hulu']==1].reset_index()
hulu_runtimes_high_movies = hulu_runtimes_high_movies.drop(['index'], axis = 1)
hulu_runtimes_low_movies = df_runtimes_low_movies.loc[df_runtimes_low_movies['Hulu']==1].reset_index()
hulu_runtimes_low_movies = hulu_runtimes_low_movies.drop(['index'], axis = 1)
hulu_runtimes_high_movies.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | Language | Plotline | Runtime | Kind | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3755 | Dina | 2017 | 13 | 6.8 | 98 | Denis Villeneuve | Zendaya,Rebecca Ferguson,Jason Momoa,Dave Baut... | Adventure,Drama,Sci-Fi | Canada,Hungary,United States | English | A mythic and emotionally charged hero's journe... | 265 | movie | 0 | 1 | 0 | 0 | 0 | Hulu |
| 1 | 3977 | Dark Shadows: The Haunting of Collinwood | 2009 | 7 | 7.7 | NA | NA | Joan Bennett,Thayer David,Louis Edmonds,Jonath... | Drama,Fantasy,Horror | United States | English | NA | 210 | movie | 0 | 1 | 1 | 0 | 0 | Prime Video |
| 2 | 4201 | Dark Shadows: The Vampire Curse | 2009 | NR | 7.7 | NA | NA | Joan Bennett,Thayer David,Louis Edmonds,Jonath... | Drama,Fantasy,Horror | United States | English | NA | 210 | movie | 0 | 1 | 1 | 0 | 0 | Prime Video |
| 3 | 3464 | The Green Mile | 1999 | 16 | 8.6 | 78 | Frank Darabont | Tom Hanks,David Morse,Bonnie Hunt,Michael Clar... | Crime,Drama,Fantasy,Mystery | United States | English,French | Death Row guards at a penitentiary, in the 193... | 189 | movie | 0 | 1 | 0 | 0 | 0 | Hulu |
| 4 | 16580 | Fear Box | 2018 | 13 | 6.2 | NA | Michael Bay | Ben Affleck,Josh Hartnett,Kate Beckinsale,Will... | Action,Drama,History,Romance,War | United States | English,Japanese,French | NA | 183 | movie | 0 | 1 | 0 | 0 | 0 | Hulu |
fig = px.bar(y = hulu_runtimes_high_movies['Title'][:15],
x = hulu_runtimes_high_movies['Runtime'][:15],
color = hulu_runtimes_high_movies['Runtime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
title = 'Movies with Highest Runtime in Minutes : Hulu')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.bar(y = hulu_runtimes_low_movies['Title'][:15],
x = hulu_runtimes_low_movies['Runtime'][:15],
color = hulu_runtimes_low_movies['Runtime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
title = 'Movies with Lowest Runtime in Minutes : Hulu')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
prime_video_runtimes_high_movies = df_runtimes_high_movies.loc[df_runtimes_high_movies['Prime Video']==1].reset_index()
prime_video_runtimes_high_movies = prime_video_runtimes_high_movies.drop(['index'], axis = 1)
prime_video_runtimes_low_movies = df_runtimes_low_movies.loc[df_runtimes_low_movies['Prime Video']==1].reset_index()
prime_video_runtimes_low_movies = prime_video_runtimes_low_movies.drop(['index'], axis = 1)
prime_video_runtimes_high_movies.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | Language | Plotline | Runtime | Kind | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16741 | The Remarkable 20th Century | 2004 | NR | 7.8 | NA | Scott Popjes,Steven Vosburgh | Howard K. Smith,Jimmy Hodson | Documentary | United States | NA | This four-part series takes an in-depth look a... | 600 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video |
| 1 | 16720 | The Ultimate Civil War Series: 150th Anniversa... | 2012 | NR | 6.9 | NA | Kevin R. Hershberger | Steve Alexander,Randy Allen,Coby Batty,Scott W... | Documentary,Action,Drama,History,War | United States | English | NA | 353 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video |
| 2 | 12686 | Custer's Last Stand | 1936 | NR | 4.7 | NA | Elmer Clifton | Rex Lease,Lona Andre,William Farnum,Ruth Mix,J... | Adventure,History,Romance,War,Western | United States | English | A cruel and ruthless bandit kills a tavern own... | 328 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video |
| 3 | 5520 | The Greatest Story Ever Told | 1965 | 0 | 6.6 | 41 | George Stevens,David Lean,Jean Negulesco | Max von Sydow,Michael Anderson Jr.,Carroll Bak... | Biography,Drama,History | United States | English | At 30, Patrick O'Brien was TransFatty, a New Y... | 260 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video |
| 4 | 4536 | Tom Petty and the Heartbreakers: Runnin' Down ... | 2007 | NR | 8.6 | 100 | Peter Bogdanovich | Neil Armstrong,Mick Avory,Ron Blair,Peter Bogd... | Documentary,Music | United States | English | NA | 259 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video |
fig = px.bar(y = prime_video_runtimes_high_movies['Title'][:15],
x = prime_video_runtimes_high_movies['Runtime'][:15],
color = prime_video_runtimes_high_movies['Runtime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
title = 'Movies with Highest Runtime in Minutes : Prime Video')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.bar(y = prime_video_runtimes_low_movies['Title'][:15],
x = prime_video_runtimes_low_movies['Runtime'][:15],
color = prime_video_runtimes_low_movies['Runtime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
title = 'Movies with Lowest Runtime in Minutes : Prime Video')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
disney_runtimes_high_movies = df_runtimes_high_movies.loc[df_runtimes_high_movies['Disney+']==1].reset_index()
disney_runtimes_high_movies = disney_runtimes_high_movies.drop(['index'], axis = 1)
disney_runtimes_low_movies = df_runtimes_low_movies.loc[df_runtimes_low_movies['Disney+']==1].reset_index()
disney_runtimes_low_movies = disney_runtimes_low_movies.drop(['index'], axis = 1)
disney_runtimes_high_movies.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | Language | Plotline | Runtime | Kind | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 15735 | Avengers: Endgame | 2019 | 13 | 8.4 | 94 | Anthony Russo,Joe Russo | Robert Downey Jr.,Chris Evans,Mark Ruffalo,Chr... | Action,Adventure,Drama,Sci-Fi | United States | English,Japanese,Xhosa,German | An elderly man reads the book "The Princess Br... | 181 | movie | 0 | 0 | 0 | 1 | 0 | Disney+ |
| 1 | 15774 | The Sound of Music | 1965 | 0 | 8 | 83 | Robert Wise | Julie Andrews,Christopher Plummer,Eleanor Park... | Biography,Drama,Family,Musical,Romance | United States | English,German | In this animated comedy from the folks at Disn... | 172 | movie | 0 | 0 | 0 | 1 | 0 | Disney+ |
| 2 | 15803 | Pirates of the Caribbean: At World's End | 2007 | 13 | 7.1 | 44 | Gore Verbinski | Johnny Depp,Geoffrey Rush,Orlando Bloom,Keira ... | Action,Adventure,Fantasy | United States | English | The Good Dinosaur asks the question: What if t... | 169 | movie | 0 | 0 | 0 | 1 | 0 | Disney+ |
| 3 | 15970 | Around the World in 80 Days | 2004 | 0 | 6.8 | 32 | Michael Anderson,John Farrow | Cantinflas,Finlay Currie,Robert Morley,Ronald ... | Adventure,Comedy,Family,Romance | United States | English,Spanish,French | Race car driver, Jim Douglas goes to Monte Car... | 167 | movie | 0 | 0 | 0 | 1 | 0 | Disney+ |
| 4 | 15793 | Star Wars: The Last Jedi | 2017 | 13 | 7 | 90 | Rian Johnson | Mark Hamill,Carrie Fisher,Adam Driver,Daisy Ri... | Action,Adventure,Fantasy,Sci-Fi | United States | English | While living the quiet life in a swamp, Kermit... | 152 | movie | 0 | 0 | 0 | 1 | 0 | Disney+ |
fig = px.bar(y = disney_runtimes_high_movies['Title'][:15],
x = disney_runtimes_high_movies['Runtime'][:15],
color = disney_runtimes_high_movies['Runtime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
title = 'Movies with Highest Runtime in Minutes : Disney+')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.bar(y = disney_runtimes_low_movies['Title'][:15],
x = disney_runtimes_low_movies['Runtime'][:15],
color = disney_runtimes_low_movies['Runtime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
title = 'Movies with Lowest Runtime in Minutes : Disney+')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
print(f'''
The Movie with Highest Runtime Ever Got is '{df_runtimes_high_movies['Title'][0]}' : '{df_runtimes_high_movies['Runtime'].max()}'\n
The Movie with Lowest Runtime Ever Got is '{df_runtimes_low_movies['Title'][0]}' : '{df_runtimes_low_movies['Runtime'].min()}'\n
The Movie with Highest Runtime on 'Netflix' is '{netflix_runtimes_high_movies['Title'][0]}' : '{netflix_runtimes_high_movies['Runtime'].max()}'\n
The Movie with Lowest Runtime on 'Netflix' is '{netflix_runtimes_low_movies['Title'][0]}' : '{netflix_runtimes_low_movies['Runtime'].min()}'\n
The Movie with Highest Runtime on 'Hulu' is '{hulu_runtimes_high_movies['Title'][0]}' : '{hulu_runtimes_high_movies['Runtime'].max()}'\n
The Movie with Lowest Runtime on 'Hulu' is '{hulu_runtimes_low_movies['Title'][0]}' : '{hulu_runtimes_low_movies['Runtime'].min()}'\n
The Movie with Highest Runtime on 'Prime Video' is '{prime_video_runtimes_high_movies['Title'][0]}' : '{prime_video_runtimes_high_movies['Runtime'].max()}'\n
The Movie with Lowest Runtime on 'Prime Video' is '{prime_video_runtimes_low_movies['Title'][0]}' : '{prime_video_runtimes_low_movies['Runtime'].min()}'\n
The Movie with Highest Runtime on 'Disney+' is '{disney_runtimes_high_movies['Title'][0]}' : '{disney_runtimes_high_movies['Runtime'].max()}'\n
The Movie with Lowest Runtime on 'Disney+' is '{disney_runtimes_low_movies['Title'][0]}' : '{disney_runtimes_low_movies['Runtime'].min()}'\n
''')
The Movie with Highest Runtime Ever Got is 'The Remarkable 20th Century' : '600'
The Movie with Lowest Runtime Ever Got is 'Thanksgiving' : '2'
The Movie with Highest Runtime on 'Netflix' is 'The Gospel of Matthew' : '258'
The Movie with Lowest Runtime on 'Netflix' is 'Silent' : '3'
The Movie with Highest Runtime on 'Hulu' is 'Dina' : '265'
The Movie with Lowest Runtime on 'Hulu' is 'A Brief History' : '3'
The Movie with Highest Runtime on 'Prime Video' is 'The Remarkable 20th Century' : '600'
The Movie with Lowest Runtime on 'Prime Video' is 'Thanksgiving' : '2'
The Movie with Highest Runtime on 'Disney+' is 'Avengers: Endgame' : '181'
The Movie with Lowest Runtime on 'Disney+' is 'Luxo Jr.' : '2'
print(f'''
Accross All Platforms the Average Runtime is '{round(df_movies_runtimes['Runtime'].mean(), ndigits = 2)}'\n
The Average Runtime on 'Netflix' is '{round(netflix_runtimes_movies['Runtime'].mean(), ndigits = 2)}'\n
The Average Runtime on 'Hulu' is '{round(hulu_runtimes_movies['Runtime'].mean(), ndigits = 2)}'\n
The Average Runtime on 'Prime Video' is '{round(prime_video_runtimes_movies['Runtime'].mean(), ndigits = 2)}'\n
The Average Runtime on 'Disney+' is '{round(disney_runtimes_movies['Runtime'].mean(), ndigits = 2)}'\n
''')
Accross All Platforms the Average Runtime is '94.08'
The Average Runtime on 'Netflix' is '100.04'
The Average Runtime on 'Hulu' is '97.49'
The Average Runtime on 'Prime Video' is '92.71'
The Average Runtime on 'Disney+' is '91.59'
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(df_movies_runtimes['Runtime'],bins = 20, kde = True, ax = ax[0])
sns.boxplot(df_movies_runtimes['Runtime'], ax = ax[1])
plt.show()
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Runtime s Per Platform')
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_runtimes_movies['Runtime'][:100], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_runtimes_movies['Runtime'][:100], color = 'red', legend = True, kde = True)
sns.histplot(hulu_runtimes_movies['Runtime'][:100], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_runtimes_movies['Runtime'][:100], color = 'darkblue', legend = True, kde = True)
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
def round_val(data):
if str(data) != 'nan':
return round(data)
def round_fix(data):
if data in range(0,51):
# print(data)
return 50
if data in range(51,101):
return 100
if data in range(101,151):
return 150
if data in range(151,201):
return 200
if data in range(201,251):
return 250
if data in range(251,301):
return 300
if data in range(301,351):
return 350
if data in range(351,401):
return 400
if data in range(401,451):
return 450
if data in range(451,501):
return 500
if data in range(501,551):
return 550
if data in range(551,601):
return 600
if data in range(601,651):
return 650
if data in range(651,701):
return 700
if data in range(701,751):
return 750
if data in range(751,801):
return 800
if data in range(801,851):
return 850
if data in range(851,901):
return 900
if data in range(901,951):
return 950
if data in range(951,1001):
return 1000
if data in range(1001,1051):
return 1050
if data in range(1051,1101):
return 1100
if data in range(1101,1151):
return 1150
if data in range(1151,1201):
return 1200
if data in range(1201,1251):
return 1250
if data in range(1251,1301):
return 1300
if data in range(1301,1351):
return 1350
if data in range(1351,2001):
return 2000
df_movies_runtimes_group['Runtime Group'] = df_movies_runtimes['Runtime'].apply(round_fix)
runtimes_values = df_movies_runtimes_group['Runtime Group'].value_counts().sort_index(ascending = False).tolist()
runtimes_index = df_movies_runtimes_group['Runtime Group'].value_counts().sort_index(ascending = False).index
# runtimes_values, runtimes_index
runtimes_group_count = df_movies_runtimes_group.groupby('Runtime Group')['Title'].count()
runtimes_group_movies = df_movies_runtimes_group.groupby('Runtime Group')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
runtimes_group_data_movies = pd.concat([runtimes_group_count, runtimes_group_movies], axis = 1).reset_index().rename(columns = {'Title' : 'Movies Count'})
runtimes_group_data_movies = runtimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False)
# Runtime Group with Movies Counts - All Platforms Combined
runtimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False)
| Runtime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 1 | 100 | 10787 | 1788 | 616 | 8328 | 330 |
| 2 | 150 | 4641 | 1415 | 380 | 2978 | 169 |
| 0 | 50 | 636 | 144 | 28 | 442 | 46 |
| 3 | 200 | 444 | 144 | 15 | 311 | 7 |
| 4 | 250 | 24 | 9 | 2 | 17 | 0 |
| 5 | 300 | 6 | 1 | 1 | 4 | 0 |
| 6 | 350 | 1 | 0 | 0 | 1 | 0 |
| 7 | 400 | 1 | 0 | 0 | 1 | 0 |
| 8 | 600 | 1 | 0 | 0 | 1 | 0 |
runtimes_group_data_movies.sort_values(by = 'Runtime Group', ascending = False)
| Runtime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 8 | 600 | 1 | 0 | 0 | 1 | 0 |
| 7 | 400 | 1 | 0 | 0 | 1 | 0 |
| 6 | 350 | 1 | 0 | 0 | 1 | 0 |
| 5 | 300 | 6 | 1 | 1 | 4 | 0 |
| 4 | 250 | 24 | 9 | 2 | 17 | 0 |
| 3 | 200 | 444 | 144 | 15 | 311 | 7 |
| 2 | 150 | 4641 | 1415 | 380 | 2978 | 169 |
| 1 | 100 | 10787 | 1788 | 616 | 8328 | 330 |
| 0 | 50 | 636 | 144 | 28 | 442 | 46 |
fig = px.bar(y = runtimes_group_data_movies['Movies Count'],
x = runtimes_group_data_movies['Runtime Group'],
color = runtimes_group_data_movies['Runtime Group'],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies Count', 'x' : 'Runtime : In Minutes'},
title = 'Movies with Group Runtime in Minutes : All Platforms')
fig.update_layout(plot_bgcolor = "white")
fig.show()
fig = px.pie(runtimes_group_data_movies[:10],
names = runtimes_group_data_movies['Runtime Group'],
values = runtimes_group_data_movies['Movies Count'],
color = runtimes_group_data_movies['Movies Count'],
color_discrete_sequence = px.colors.sequential.Teal)
fig.update_traces(textinfo = 'percent+label',
title = 'Movies Count based on Runtime Group')
fig.show()
df_runtimes_group_high_movies = runtimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False).reset_index()
df_runtimes_group_high_movies = df_runtimes_group_high_movies.drop(['index'], axis = 1)
# filter = (runtimes_group_data_movies['Movies Count'] == (runtimes_group_data_movies['Movies Count'].max()))
# df_runtimes_group_high_movies = runtimes_group_data_movies[filter]
# highest_rated_movies = runtimes_group_data_movies.loc[runtimes_group_data_movies['Movies Count'].idxmax()]
# print('\nRuntime with Highest Ever Movies Count are : All Platforms Combined\n')
df_runtimes_group_high_movies.head(5)
| Runtime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | 100 | 10787 | 1788 | 616 | 8328 | 330 |
| 1 | 150 | 4641 | 1415 | 380 | 2978 | 169 |
| 2 | 50 | 636 | 144 | 28 | 442 | 46 |
| 3 | 200 | 444 | 144 | 15 | 311 | 7 |
| 4 | 250 | 24 | 9 | 2 | 17 | 0 |
df_runtimes_group_low_movies = runtimes_group_data_movies.sort_values(by = 'Movies Count', ascending = True).reset_index()
df_runtimes_group_low_movies = df_runtimes_group_low_movies.drop(['index'], axis = 1)
# filter = (runtimes_group_data_movies['Movies Count'] = = (runtimes_group_data_movies['Movies Count'].min()))
# df_runtimes_group_low_movies = runtimes_group_data_movies[filter]
# print('\nRuntime with Lowest Ever Movies Count are : All Platforms Combined\n')
df_runtimes_group_low_movies.head(5)
| Runtime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | 350 | 1 | 0 | 0 | 1 | 0 |
| 1 | 400 | 1 | 0 | 0 | 1 | 0 |
| 2 | 600 | 1 | 0 | 0 | 1 | 0 |
| 3 | 300 | 6 | 1 | 1 | 4 | 0 |
| 4 | 250 | 24 | 9 | 2 | 17 | 0 |
print(f'''
Total '{df_movies_runtimes['Runtime'].count()}' Titles are available on All Platforms, out of which\n
You Can Choose to see Movies from Total '{runtimes_group_data_movies['Runtime Group'].unique().shape[0]}' Runtime Group, They were Like this, \n
{runtimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False)['Runtime Group'].unique()} etc. \n
The Runtime Group with Highest Movies Count have '{runtimes_group_data_movies['Movies Count'].max()}' Movies Available is '{df_runtimes_group_high_movies['Runtime Group'][0]}', &\n
The Runtime Group with Lowest Movies Count have '{runtimes_group_data_movies['Movies Count'].min()}' Movies Available is '{df_runtimes_group_low_movies['Runtime Group'][0]}'
''')
Total '16541' Titles are available on All Platforms, out of which
You Can Choose to see Movies from Total '9' Runtime Group, They were Like this,
[100 150 50 200 250 300 350 400 600] etc.
The Runtime Group with Highest Movies Count have '10787' Movies Available is '100', &
The Runtime Group with Lowest Movies Count have '1' Movies Available is '350'
netflix_runtimes_group_movies = runtimes_group_data_movies[runtimes_group_data_movies['Netflix'] != 0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_runtimes_group_movies = netflix_runtimes_group_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
netflix_runtimes_group_high_movies = df_runtimes_group_high_movies.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_runtimes_group_high_movies = netflix_runtimes_group_high_movies.drop(['index'], axis = 1)
netflix_runtimes_group_low_movies = df_runtimes_group_high_movies.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_runtimes_group_low_movies = netflix_runtimes_group_low_movies.drop(['index'], axis = 1)
netflix_runtimes_group_high_movies.head(5)
| Runtime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | 100 | 10787 | 1788 | 616 | 8328 | 330 |
| 1 | 150 | 4641 | 1415 | 380 | 2978 | 169 |
| 2 | 50 | 636 | 144 | 28 | 442 | 46 |
| 3 | 200 | 444 | 144 | 15 | 311 | 7 |
| 4 | 250 | 24 | 9 | 2 | 17 | 0 |
hulu_runtimes_group_movies = runtimes_group_data_movies[runtimes_group_data_movies['Hulu'] != 0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_runtimes_group_movies = hulu_runtimes_group_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
hulu_runtimes_group_high_movies = df_runtimes_group_high_movies.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_runtimes_group_high_movies = hulu_runtimes_group_high_movies.drop(['index'], axis = 1)
hulu_runtimes_group_low_movies = df_runtimes_group_high_movies.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_runtimes_group_low_movies = hulu_runtimes_group_low_movies.drop(['index'], axis = 1)
hulu_runtimes_group_high_movies.head(5)
| Runtime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | 100 | 10787 | 1788 | 616 | 8328 | 330 |
| 1 | 150 | 4641 | 1415 | 380 | 2978 | 169 |
| 2 | 50 | 636 | 144 | 28 | 442 | 46 |
| 3 | 200 | 444 | 144 | 15 | 311 | 7 |
| 4 | 250 | 24 | 9 | 2 | 17 | 0 |
prime_video_runtimes_group_movies = runtimes_group_data_movies[runtimes_group_data_movies['Prime Video'] != 0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_runtimes_group_movies = prime_video_runtimes_group_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)
prime_video_runtimes_group_high_movies = df_runtimes_group_high_movies.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_runtimes_group_high_movies = prime_video_runtimes_group_high_movies.drop(['index'], axis = 1)
prime_video_runtimes_group_low_movies = df_runtimes_group_high_movies.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_runtimes_group_low_movies = prime_video_runtimes_group_low_movies.drop(['index'], axis = 1)
prime_video_runtimes_group_high_movies.head(5)
| Runtime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | 100 | 10787 | 1788 | 616 | 8328 | 330 |
| 1 | 150 | 4641 | 1415 | 380 | 2978 | 169 |
| 2 | 50 | 636 | 144 | 28 | 442 | 46 |
| 3 | 200 | 444 | 144 | 15 | 311 | 7 |
| 4 | 250 | 24 | 9 | 2 | 17 | 0 |
disney_runtimes_group_movies = runtimes_group_data_movies[runtimes_group_data_movies['Disney+'] != 0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_runtimes_group_movies = disney_runtimes_group_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
disney_runtimes_group_high_movies = df_runtimes_group_high_movies.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_runtimes_group_high_movies = disney_runtimes_group_high_movies.drop(['index'], axis = 1)
disney_runtimes_group_low_movies = df_runtimes_group_high_movies.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_runtimes_group_low_movies = disney_runtimes_group_low_movies.drop(['index'], axis = 1)
disney_runtimes_group_high_movies.head(5)
| Runtime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | 100 | 10787 | 1788 | 616 | 8328 | 330 |
| 1 | 150 | 4641 | 1415 | 380 | 2978 | 169 |
| 2 | 50 | 636 | 144 | 28 | 442 | 46 |
| 3 | 200 | 444 | 144 | 15 | 311 | 7 |
| 4 | 250 | 24 | 9 | 2 | 17 | 0 |
print(f'''
The Runtime Group with Highest Movies Count Ever Got is '{df_runtimes_group_high_movies['Runtime Group'][0]}' : '{df_runtimes_group_high_movies['Movies Count'].max()}'\n
The Runtime Group with Lowest Movies Count Ever Got is '{df_runtimes_group_low_movies['Runtime Group'][0]}' : '{df_runtimes_group_low_movies['Movies Count'].min()}'\n
The Runtime Group with Highest Movies Count on 'Netflix' is '{netflix_runtimes_group_high_movies['Runtime Group'][0]}' : '{netflix_runtimes_group_high_movies['Netflix'].max()}'\n
The Runtime Group with Lowest Movies Count on 'Netflix' is '{netflix_runtimes_group_low_movies['Runtime Group'][0]}' : '{netflix_runtimes_group_low_movies['Netflix'].min()}'\n
The Runtime Group with Highest Movies Count on 'Hulu' is '{hulu_runtimes_group_high_movies['Runtime Group'][0]}' : '{hulu_runtimes_group_high_movies['Hulu'].max()}'\n
The Runtime Group with Lowest Movies Count on 'Hulu' is '{hulu_runtimes_group_low_movies['Runtime Group'][0]}' : '{hulu_runtimes_group_low_movies['Hulu'].min()}'\n
The Runtime Group with Highest Movies Count on 'Prime Video' is '{prime_video_runtimes_group_high_movies['Runtime Group'][0]}' : '{prime_video_runtimes_group_high_movies['Prime Video'].max()}'\n
The Runtime Group with Lowest Movies Count on 'Prime Video' is '{prime_video_runtimes_group_low_movies['Runtime Group'][0]}' : '{prime_video_runtimes_group_low_movies['Prime Video'].min()}'\n
The Runtime Group with Highest Movies Count on 'Disney+' is '{disney_runtimes_group_high_movies['Runtime Group'][0]}' : '{disney_runtimes_group_high_movies['Disney+'].max()}'\n
The Runtime Group with Lowest Movies Count on 'Disney+' is '{disney_runtimes_group_low_movies['Runtime Group'][0]}' : '{disney_runtimes_group_low_movies['Disney+'].min()}'\n
''')
The Runtime Group with Highest Movies Count Ever Got is '100' : '10787'
The Runtime Group with Lowest Movies Count Ever Got is '350' : '1'
The Runtime Group with Highest Movies Count on 'Netflix' is '100' : '1788'
The Runtime Group with Lowest Movies Count on 'Netflix' is '350' : '0'
The Runtime Group with Highest Movies Count on 'Hulu' is '100' : '616'
The Runtime Group with Lowest Movies Count on 'Hulu' is '350' : '0'
The Runtime Group with Highest Movies Count on 'Prime Video' is '100' : '8328'
The Runtime Group with Lowest Movies Count on 'Prime Video' is '350' : '1'
The Runtime Group with Highest Movies Count on 'Disney+' is '100' : '330'
The Runtime Group with Lowest Movies Count on 'Disney+' is '250' : '0'
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
n_ru_ax1 = sns.barplot(x = netflix_runtimes_group_movies['Runtime Group'][:10], y = netflix_runtimes_group_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = hulu_runtimes_group_movies['Runtime Group'][:10], y = hulu_runtimes_group_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = prime_video_runtimes_group_movies['Runtime Group'][:10], y = prime_video_runtimes_group_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = disney_runtimes_group_movies['Runtime Group'][:10], y = disney_runtimes_group_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
plt.show()
plt.figure(figsize = (20, 5))
sns.lineplot(x = runtimes_group_data_movies['Runtime Group'], y = runtimes_group_data_movies['Netflix'], color = 'red')
sns.lineplot(x = runtimes_group_data_movies['Runtime Group'], y = runtimes_group_data_movies['Hulu'], color = 'lightgreen')
sns.lineplot(x = runtimes_group_data_movies['Runtime Group'], y = runtimes_group_data_movies['Prime Video'], color = 'lightblue')
sns.lineplot(x = runtimes_group_data_movies['Runtime Group'], y = runtimes_group_data_movies['Disney+'], color = 'darkblue')
plt.xlabel('Runtime Group', fontsize = 15)
plt.ylabel('Movies Count', fontsize = 15)
plt.show()
print(f'''
Accross All Platforms Total Count of Runtime Group is '{runtimes_group_data_movies['Runtime Group'].unique().shape[0]}'\n
Total Count of Runtime Group on 'Netflix' is '{netflix_runtimes_group_movies['Runtime Group'].unique().shape[0]}'\n
Total Count of Runtime Group on 'Hulu' is '{hulu_runtimes_group_movies['Runtime Group'].unique().shape[0]}'\n
Total Count of Runtime Group on 'Prime Video' is '{prime_video_runtimes_group_movies['Runtime Group'].unique().shape[0]}'\n
Total Count of Runtime Group on 'Disney+' is '{disney_runtimes_group_movies['Runtime Group'].unique().shape[0]}'\n
''')
Accross All Platforms Total Count of Runtime Group is '9'
Total Count of Runtime Group on 'Netflix' is '6'
Total Count of Runtime Group on 'Hulu' is '6'
Total Count of Runtime Group on 'Prime Video' is '9'
Total Count of Runtime Group on 'Disney+' is '4'
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
n_ru_ax1 = sns.lineplot(y = runtimes_group_data_movies['Runtime Group'], x = runtimes_group_data_movies['Netflix'], color = 'red', ax = axes[0, 0])
h_ru_ax2 = sns.lineplot(y = runtimes_group_data_movies['Runtime Group'], x = runtimes_group_data_movies['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_ru_ax3 = sns.lineplot(y = runtimes_group_data_movies['Runtime Group'], x = runtimes_group_data_movies['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_ru_ax4 = sns.lineplot(y = runtimes_group_data_movies['Runtime Group'], x = runtimes_group_data_movies['Disney+'], color = 'darkblue', ax = axes[1, 1])
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
plt.show()
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
n_ru_ax1 = sns.barplot(x = runtimes_group_data_movies['Runtime Group'][:10], y = runtimes_group_data_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = runtimes_group_data_movies['Runtime Group'][:10], y = runtimes_group_data_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = runtimes_group_data_movies['Runtime Group'][:10], y = runtimes_group_data_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = runtimes_group_data_movies['Runtime Group'][:10], y = runtimes_group_data_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
plt.show()
df_screentimes_high_movies = df_movies_screentimes.sort_values(by = 'Screentime', ascending = False).reset_index()
df_screentimes_high_movies = df_screentimes_high_movies.drop(['index'], axis = 1)
# filter = (df_movies_screentimes['Screentime'] == (df_movies_screentimes['Screentime'].max()))
# df_screentimes_high_movies = df_movies_screentimes[filter]
# highest_rated_movies = df_movies_screentimes.loc[df_movies_screentimes['Screentime'].idxmax()]
print('\nMovies with Highest Ever Screentime are : \n')
df_screentimes_high_movies.head(5)
Movies with Highest Ever Screentime are :
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Plotline | Runtime | Kind | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Screentime | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16741 | The Remarkable 20th Century | 2004 | NR | 7.8 | NA | Scott Popjes,Steven Vosburgh | Howard K. Smith,Jimmy Hodson | Documentary | United States | ... | This four-part series takes an in-depth look a... | 600 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video | 10.00 |
| 1 | 16720 | The Ultimate Civil War Series: 150th Anniversa... | 2012 | NR | 6.9 | NA | Kevin R. Hershberger | Steve Alexander,Randy Allen,Coby Batty,Scott W... | Documentary,Action,Drama,History,War | United States | ... | NA | 353 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video | 5.88 |
| 2 | 12686 | Custer's Last Stand | 1936 | NR | 4.7 | NA | Elmer Clifton | Rex Lease,Lona Andre,William Farnum,Ruth Mix,J... | Adventure,History,Romance,War,Western | United States | ... | A cruel and ruthless bandit kills a tavern own... | 328 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video | 5.47 |
| 3 | 3755 | Dina | 2017 | 13 | 6.8 | 98 | Denis Villeneuve | Zendaya,Rebecca Ferguson,Jason Momoa,Dave Baut... | Adventure,Drama,Sci-Fi | Canada,Hungary,United States | ... | A mythic and emotionally charged hero's journe... | 265 | movie | 0 | 1 | 0 | 0 | 0 | Hulu | 4.42 |
| 4 | 5520 | The Greatest Story Ever Told | 1965 | 0 | 6.6 | 41 | George Stevens,David Lean,Jean Negulesco | Max von Sydow,Michael Anderson Jr.,Carroll Bak... | Biography,Drama,History | United States | ... | At 30, Patrick O'Brien was TransFatty, a New Y... | 260 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video | 4.33 |
5 rows × 21 columns
fig = px.bar(y = df_screentimes_high_movies['Title'][:15],
x = df_screentimes_high_movies['Screentime'][:15],
color = df_screentimes_high_movies['Screentime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
title = 'Movies with Highest Screentime in Hours : All Platforms')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
df_screentimes_low_movies = df_movies_screentimes.sort_values(by = 'Screentime', ascending = True).reset_index()
df_screentimes_low_movies = df_screentimes_low_movies.drop(['index'], axis = 1)
# filter = (df_movies_screentimes['Screentime'] == (df_movies_screentimes['Screentime'].min()))
# df_screentimes_low_movies = df_movies_screentimes[filter]
print('\nMovies with Lowest Ever Screentime are : \n')
df_screentimes_low_movies.head(5)
Movies with Lowest Ever Screentime are :
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Plotline | Runtime | Kind | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Screentime | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 13905 | Thanksgiving | 2014 | NR | 7.7 | 100 | Eli Roth | Mark Bakunas,Vendula Bednarova,Chris Briggs,Da... | Short,Comedy,Horror | United States | ... | Anthony Dexter---bare-chested most of the film... | 2 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video | 0.03 |
| 1 | 15583 | Jurassic Africa | 2018 | NR | 6.6 | NA | NA | Rick Carter,Gerald R. Molen,Steven Spielberg | Short | United States | ... | Baseball Hall of Famer Reggie Jackson provides... | 2 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video | 0.03 |
| 2 | 15971 | Luxo Jr. | 1986 | 0 | 7.3 | NA | John Lasseter | NA | Animation,Short,Family | United States | ... | Alameda Slim (Randy Quaid), a wanted cattle ru... | 2 | movie | 0 | 0 | 0 | 1 | 0 | Disney+ | 0.03 |
| 3 | 14728 | #LoveSwag | 2015 | 13 | 4.4 | NA | Austin Davoren | Giovanni Watson,Yaritza Betancourt,Shaun Royer | Short,Comedy,Drama,Romance | United States | ... | Dead bodies are being found in the New York ha... | 2 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video | 0.03 |
| 4 | 16572 | A Brief History | 2016 | NR | 7.5 | NA | Ion Popescu-Gopo | NA | Animation,Short | Romania | ... | Three-part series, Around the Way, celebrates ... | 3 | movie | 0 | 1 | 0 | 0 | 0 | Hulu | 0.05 |
5 rows × 21 columns
fig = px.bar(y = df_screentimes_low_movies['Title'][:15],
x = df_screentimes_low_movies['Screentime'][:15],
color = df_screentimes_low_movies['Screentime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
title = 'Movies with Lowest Screentime in Hours : All Platforms')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
print(f'''
Total '{df_movies_screentimes['Screentime'].unique().shape[0]}' unique Screentime s were Given, They were Like this,\n
{df_movies_screentimes.sort_values(by = 'Screentime', ascending = False)['Screentime'].unique()}\n
The Highest Ever Screentime Ever Any Movie Got is '{df_screentimes_high_movies['Title'][0]}' : '{df_screentimes_high_movies['Screentime'].max()}'\n
The Lowest Ever Screentime Ever Any Movie Got is '{df_screentimes_low_movies['Title'][0]}' : '{df_screentimes_low_movies['Screentime'].min()}'\n
''')
Total '220' unique Screentime s were Given, They were Like this,
[10. 5.88 5.47 4.42 4.33 4.32 4.3 4.27 4.25 4.03 4. 3.97
3.88 3.78 3.73 3.67 3.6 3.58 3.55 3.53 3.5 3.48 3.43 3.4
3.35 3.33 3.28 3.25 3.23 3.22 3.2 3.18 3.15 3.13 3.12 3.1
3.08 3.07 3.05 3.03 3.02 3. 2.98 2.97 2.95 2.93 2.92 2.9
2.88 2.87 2.85 2.83 2.82 2.8 2.78 2.77 2.75 2.73 2.72 2.7
2.68 2.67 2.65 2.63 2.62 2.6 2.58 2.57 2.55 2.53 2.52 2.5
2.48 2.47 2.45 2.43 2.42 2.4 2.38 2.37 2.35 2.33 2.32 2.3
2.28 2.27 2.25 2.23 2.22 2.2 2.18 2.17 2.15 2.13 2.12 2.1
2.08 2.07 2.05 2.03 2.02 2. 1.98 1.97 1.95 1.93 1.92 1.9
1.88 1.87 1.85 1.83 1.82 1.8 1.78 1.77 1.75 1.73 1.72 1.7
1.68 1.67 1.65 1.63 1.62 1.6 1.58 1.57 1.55 1.53 1.52 1.5
1.48 1.47 1.45 1.43 1.42 1.4 1.38 1.37 1.35 1.33 1.32 1.3
1.28 1.27 1.25 1.23 1.22 1.2 1.18 1.17 1.15 1.13 1.12 1.1
1.08 1.07 1.05 1.03 1.02 1. 0.98 0.97 0.95 0.93 0.92 0.9
0.88 0.87 0.85 0.83 0.82 0.8 0.78 0.77 0.75 0.73 0.72 0.7
0.68 0.67 0.65 0.63 0.62 0.6 0.58 0.57 0.55 0.53 0.52 0.5
0.48 0.47 0.45 0.43 0.42 0.4 0.38 0.37 0.35 0.33 0.32 0.3
0.28 0.27 0.25 0.23 0.22 0.2 0.18 0.17 0.15 0.13 0.12 0.1
0.08 0.07 0.05 0.03]
The Highest Ever Screentime Ever Any Movie Got is 'The Remarkable 20th Century' : '10.0'
The Lowest Ever Screentime Ever Any Movie Got is 'Thanksgiving' : '0.03'
netflix_screentimes_high_movies = df_screentimes_high_movies.loc[df_screentimes_high_movies['Netflix']==1].reset_index()
netflix_screentimes_high_movies = netflix_screentimes_high_movies.drop(['index'], axis = 1)
netflix_screentimes_low_movies = df_screentimes_low_movies.loc[df_screentimes_low_movies['Netflix']==1].reset_index()
netflix_screentimes_low_movies = netflix_screentimes_low_movies.drop(['index'], axis = 1)
netflix_screentimes_high_movies.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Plotline | Runtime | Kind | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Screentime | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2207 | The Gospel of Matthew | 2014 | 0 | 7.7 | NA | Regardt van den Bergh | Richard Kiley,Bruce Marchiano,Gerrit Schoonhov... | Biography,Drama,History | South Africa | ... | Matthew 15:1 - 28:20 - The year is about 62 A.... | 258 | movie | 1 | 0 | 0 | 0 | 0 | Netflix | 4.30 |
| 1 | 80 | Lagaan: Once Upon a Time in India | 2001 | 7 | 8.1 | 95 | Ashutosh Gowariker | Aamir Khan,Gracy Singh,Rachel Shelley,Paul Bla... | Drama,Musical,Sport | India,United Kingdom | ... | This is the story about the resilience shown b... | 224 | movie | 1 | 0 | 0 | 0 | 0 | Netflix | 3.73 |
| 2 | 2248 | Jatt James Bond | 2014 | 7 | 6.7 | NA | Rohit Jugraj | Gippy Grewal,Zareen Khan,Gurpreet Ghuggi,Yashp... | Comedy | India | ... | NA | 220 | movie | 1 | 0 | 0 | 0 | 0 | Netflix | 3.67 |
| 3 | 2485 | The Gospel of Luke | 2015 | NR | 7.1 | NA | David Batty | Selva Rasalingam,Karima Gouit,Mourad Zaoui,El ... | Drama | United States,United Kingdom,Morocco | ... | NA | 215 | movie | 1 | 0 | 0 | 0 | 0 | Netflix | 3.58 |
| 4 | 368 | Jodhaa Akbar | 2008 | 13 | 7.6 | 75 | Ashutosh Gowariker | Hrithik Roshan,Aishwarya Rai Bachchan,Sonu Soo... | Action,Drama,History,Romance,War | India | ... | Jodhaa Akbar is a sixteenth century love story... | 213 | movie | 1 | 0 | 0 | 0 | 0 | Netflix | 3.55 |
5 rows × 21 columns
fig = px.bar(y = netflix_screentimes_high_movies['Title'][:15],
x = netflix_screentimes_high_movies['Screentime'][:15],
color = netflix_screentimes_high_movies['Screentime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
title = 'Movies with Highest Screentime in Hours : Netflix')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.bar(y = netflix_screentimes_low_movies['Title'][:15],
x = netflix_screentimes_low_movies['Screentime'][:15],
color = netflix_screentimes_low_movies['Screentime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
title = 'Movies with Lowest Screentime in Hours : Netflix')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
hulu_screentimes_high_movies = df_screentimes_high_movies.loc[df_screentimes_high_movies['Hulu']==1].reset_index()
hulu_screentimes_high_movies = hulu_screentimes_high_movies.drop(['index'], axis = 1)
hulu_screentimes_low_movies = df_screentimes_low_movies.loc[df_screentimes_low_movies['Hulu']==1].reset_index()
hulu_screentimes_low_movies = hulu_screentimes_low_movies.drop(['index'], axis = 1)
hulu_screentimes_high_movies.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Plotline | Runtime | Kind | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Screentime | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3755 | Dina | 2017 | 13 | 6.8 | 98 | Denis Villeneuve | Zendaya,Rebecca Ferguson,Jason Momoa,Dave Baut... | Adventure,Drama,Sci-Fi | Canada,Hungary,United States | ... | A mythic and emotionally charged hero's journe... | 265 | movie | 0 | 1 | 0 | 0 | 0 | Hulu | 4.42 |
| 1 | 3977 | Dark Shadows: The Haunting of Collinwood | 2009 | 7 | 7.7 | NA | NA | Joan Bennett,Thayer David,Louis Edmonds,Jonath... | Drama,Fantasy,Horror | United States | ... | NA | 210 | movie | 0 | 1 | 1 | 0 | 0 | Prime Video | 3.50 |
| 2 | 4201 | Dark Shadows: The Vampire Curse | 2009 | NR | 7.7 | NA | NA | Joan Bennett,Thayer David,Louis Edmonds,Jonath... | Drama,Fantasy,Horror | United States | ... | NA | 210 | movie | 0 | 1 | 1 | 0 | 0 | Prime Video | 3.50 |
| 3 | 3464 | The Green Mile | 1999 | 16 | 8.6 | 78 | Frank Darabont | Tom Hanks,David Morse,Bonnie Hunt,Michael Clar... | Crime,Drama,Fantasy,Mystery | United States | ... | Death Row guards at a penitentiary, in the 193... | 189 | movie | 0 | 1 | 0 | 0 | 0 | Hulu | 3.15 |
| 4 | 16580 | Fear Box | 2018 | 13 | 6.2 | NA | Michael Bay | Ben Affleck,Josh Hartnett,Kate Beckinsale,Will... | Action,Drama,History,Romance,War | United States | ... | NA | 183 | movie | 0 | 1 | 0 | 0 | 0 | Hulu | 3.05 |
5 rows × 21 columns
fig = px.bar(y = hulu_screentimes_high_movies['Title'][:15],
x = hulu_screentimes_high_movies['Screentime'][:15],
color = hulu_screentimes_high_movies['Screentime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
title = 'Movies with Highest Screentime in Hours : Hulu')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.bar(y = hulu_screentimes_low_movies['Title'][:15],
x = hulu_screentimes_low_movies['Screentime'][:15],
color = hulu_screentimes_low_movies['Screentime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
title = 'Movies with Lowest Screentime in Hours : Hulu')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
prime_video_screentimes_high_movies = df_screentimes_high_movies.loc[df_screentimes_high_movies['Prime Video']==1].reset_index()
prime_video_screentimes_high_movies = prime_video_screentimes_high_movies.drop(['index'], axis = 1)
prime_video_screentimes_low_movies = df_screentimes_low_movies.loc[df_screentimes_low_movies['Prime Video']==1].reset_index()
prime_video_screentimes_low_movies = prime_video_screentimes_low_movies.drop(['index'], axis = 1)
prime_video_screentimes_high_movies.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Plotline | Runtime | Kind | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Screentime | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16741 | The Remarkable 20th Century | 2004 | NR | 7.8 | NA | Scott Popjes,Steven Vosburgh | Howard K. Smith,Jimmy Hodson | Documentary | United States | ... | This four-part series takes an in-depth look a... | 600 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video | 10.00 |
| 1 | 16720 | The Ultimate Civil War Series: 150th Anniversa... | 2012 | NR | 6.9 | NA | Kevin R. Hershberger | Steve Alexander,Randy Allen,Coby Batty,Scott W... | Documentary,Action,Drama,History,War | United States | ... | NA | 353 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video | 5.88 |
| 2 | 12686 | Custer's Last Stand | 1936 | NR | 4.7 | NA | Elmer Clifton | Rex Lease,Lona Andre,William Farnum,Ruth Mix,J... | Adventure,History,Romance,War,Western | United States | ... | A cruel and ruthless bandit kills a tavern own... | 328 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video | 5.47 |
| 3 | 5520 | The Greatest Story Ever Told | 1965 | 0 | 6.6 | 41 | George Stevens,David Lean,Jean Negulesco | Max von Sydow,Michael Anderson Jr.,Carroll Bak... | Biography,Drama,History | United States | ... | At 30, Patrick O'Brien was TransFatty, a New Y... | 260 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video | 4.33 |
| 4 | 4536 | Tom Petty and the Heartbreakers: Runnin' Down ... | 2007 | NR | 8.6 | 100 | Peter Bogdanovich | Neil Armstrong,Mick Avory,Ron Blair,Peter Bogd... | Documentary,Music | United States | ... | NA | 259 | movie | 0 | 0 | 1 | 0 | 0 | Prime Video | 4.32 |
5 rows × 21 columns
fig = px.bar(y = prime_video_screentimes_high_movies['Title'][:15],
x = prime_video_screentimes_high_movies['Screentime'][:15],
color = prime_video_screentimes_high_movies['Screentime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
title = 'Movies with Highest Screentime in Hours : Prime Video')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.bar(y = prime_video_screentimes_low_movies['Title'][:15],
x = prime_video_screentimes_low_movies['Screentime'][:15],
color = prime_video_screentimes_low_movies['Screentime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
title = 'Movies with Lowest Screentime in Hours : Prime Video')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
disney_screentimes_high_movies = df_screentimes_high_movies.loc[df_screentimes_high_movies['Disney+']==1].reset_index()
disney_screentimes_high_movies = disney_screentimes_high_movies.drop(['index'], axis = 1)
disney_screentimes_low_movies = df_screentimes_low_movies.loc[df_screentimes_low_movies['Disney+']==1].reset_index()
disney_screentimes_low_movies = disney_screentimes_low_movies.drop(['index'], axis = 1)
disney_screentimes_high_movies.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Plotline | Runtime | Kind | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Screentime | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 15735 | Avengers: Endgame | 2019 | 13 | 8.4 | 94 | Anthony Russo,Joe Russo | Robert Downey Jr.,Chris Evans,Mark Ruffalo,Chr... | Action,Adventure,Drama,Sci-Fi | United States | ... | An elderly man reads the book "The Princess Br... | 181 | movie | 0 | 0 | 0 | 1 | 0 | Disney+ | 3.02 |
| 1 | 15774 | The Sound of Music | 1965 | 0 | 8 | 83 | Robert Wise | Julie Andrews,Christopher Plummer,Eleanor Park... | Biography,Drama,Family,Musical,Romance | United States | ... | In this animated comedy from the folks at Disn... | 172 | movie | 0 | 0 | 0 | 1 | 0 | Disney+ | 2.87 |
| 2 | 15803 | Pirates of the Caribbean: At World's End | 2007 | 13 | 7.1 | 44 | Gore Verbinski | Johnny Depp,Geoffrey Rush,Orlando Bloom,Keira ... | Action,Adventure,Fantasy | United States | ... | The Good Dinosaur asks the question: What if t... | 169 | movie | 0 | 0 | 0 | 1 | 0 | Disney+ | 2.82 |
| 3 | 15970 | Around the World in 80 Days | 2004 | 0 | 6.8 | 32 | Michael Anderson,John Farrow | Cantinflas,Finlay Currie,Robert Morley,Ronald ... | Adventure,Comedy,Family,Romance | United States | ... | Race car driver, Jim Douglas goes to Monte Car... | 167 | movie | 0 | 0 | 0 | 1 | 0 | Disney+ | 2.78 |
| 4 | 15793 | Star Wars: The Last Jedi | 2017 | 13 | 7 | 90 | Rian Johnson | Mark Hamill,Carrie Fisher,Adam Driver,Daisy Ri... | Action,Adventure,Fantasy,Sci-Fi | United States | ... | While living the quiet life in a swamp, Kermit... | 152 | movie | 0 | 0 | 0 | 1 | 0 | Disney+ | 2.53 |
5 rows × 21 columns
fig = px.bar(y = disney_screentimes_high_movies['Title'][:15],
x = disney_screentimes_high_movies['Screentime'][:15],
color = disney_screentimes_high_movies['Screentime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
title = 'Movies with Highest Screentime in Hours : Disney+')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.bar(y = disney_screentimes_low_movies['Title'][:15],
x = disney_screentimes_low_movies['Screentime'][:15],
color = disney_screentimes_low_movies['Screentime'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
title = 'Movies with Lowest Screentime in Hours : Disney+')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
print(f'''
The Movie with Highest Screentime Ever Got is '{df_screentimes_high_movies['Title'][0]}' : '{df_screentimes_high_movies['Screentime'].max()}'\n
The Movie with Lowest Screentime Ever Got is '{df_screentimes_low_movies['Title'][0]}' : '{df_screentimes_low_movies['Screentime'].min()}'\n
The Movie with Highest Screentime on 'Netflix' is '{netflix_screentimes_high_movies['Title'][0]}' : '{netflix_screentimes_high_movies['Screentime'].max()}'\n
The Movie with Lowest Screentime on 'Netflix' is '{netflix_screentimes_low_movies['Title'][0]}' : '{netflix_screentimes_low_movies['Screentime'].min()}'\n
The Movie with Highest Screentime on 'Hulu' is '{hulu_screentimes_high_movies['Title'][0]}' : '{hulu_screentimes_high_movies['Screentime'].max()}'\n
The Movie with Lowest Screentime on 'Hulu' is '{hulu_screentimes_low_movies['Title'][0]}' : '{hulu_screentimes_low_movies['Screentime'].min()}'\n
The Movie with Highest Screentime on 'Prime Video' is '{prime_video_screentimes_high_movies['Title'][0]}' : '{prime_video_screentimes_high_movies['Screentime'].max()}'\n
The Movie with Lowest Screentime on 'Prime Video' is '{prime_video_screentimes_low_movies['Title'][0]}' : '{prime_video_screentimes_low_movies['Screentime'].min()}'\n
The Movie with Highest Screentime on 'Disney+' is '{disney_screentimes_high_movies['Title'][0]}' : '{disney_screentimes_high_movies['Screentime'].max()}'\n
The Movie with Lowest Screentime on 'Disney+' is '{disney_screentimes_low_movies['Title'][0]}' : '{disney_screentimes_low_movies['Screentime'].min()}'\n
''')
The Movie with Highest Screentime Ever Got is 'The Remarkable 20th Century' : '10.0'
The Movie with Lowest Screentime Ever Got is 'Thanksgiving' : '0.03'
The Movie with Highest Screentime on 'Netflix' is 'The Gospel of Matthew' : '4.3'
The Movie with Lowest Screentime on 'Netflix' is 'Silent' : '0.05'
The Movie with Highest Screentime on 'Hulu' is 'Dina' : '4.42'
The Movie with Lowest Screentime on 'Hulu' is 'A Brief History' : '0.05'
The Movie with Highest Screentime on 'Prime Video' is 'The Remarkable 20th Century' : '10.0'
The Movie with Lowest Screentime on 'Prime Video' is 'Thanksgiving' : '0.03'
The Movie with Highest Screentime on 'Disney+' is 'Avengers: Endgame' : '3.02'
The Movie with Lowest Screentime on 'Disney+' is 'Luxo Jr.' : '0.03'
print(f'''
Accross All Platforms the Average Screentime is '{round(df_movies_screentimes['Screentime'].mean(), ndigits = 2)}'\n
The Average Screentime on 'Netflix' is '{round(netflix_screentimes_movies['Screentime'].mean(), ndigits = 2)}'\n
The Average Screentime on 'Hulu' is '{round(hulu_screentimes_movies['Screentime'].mean(), ndigits = 2)}'\n
The Average Screentime on 'Prime Video' is '{round(prime_video_screentimes_movies['Screentime'].mean(), ndigits = 2)}'\n
The Average Screentime on 'Disney+' is '{round(disney_screentimes_movies['Screentime'].mean(), ndigits = 2)}'\n
''')
Accross All Platforms the Average Screentime is '1.57'
The Average Screentime on 'Netflix' is '1.67'
The Average Screentime on 'Hulu' is '1.62'
The Average Screentime on 'Prime Video' is '1.55'
The Average Screentime on 'Disney+' is '1.53'
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(df_movies_screentimes['Screentime'],bins = 20, kde = True, ax = ax[0])
sns.boxplot(df_movies_screentimes['Screentime'], ax = ax[1])
plt.show()
# Defining plot size and title
plt.figure(figsize = (20, 10))
plt.title('Screentime s Per Platform')
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_screentimes_movies['Screentime'][:100], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_screentimes_movies['Screentime'][:100], color = 'red', legend = True, kde = True)
sns.histplot(hulu_screentimes_movies['Screentime'][:100], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_screentimes_movies['Screentime'][:100], color = 'darkblue', legend = True, kde = True)
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
def round_val(data):
if str(data) != 'nan':
return round(data)
df_movies_screentimes_group = df_movies_screentimes.copy()
df_movies_screentimes_group['Screentime Group'] = df_movies_screentimes['Screentime'].apply(round_val)
screentimes_values = df_movies_screentimes_group['Screentime Group'].value_counts().sort_index(ascending = False).tolist()
screentimes_index = df_movies_screentimes_group['Screentime Group'].value_counts().sort_index(ascending = False).index
# screentimes_values, screentimes_index
screentimes_group_count = df_movies_screentimes_group.groupby('Screentime Group')['Title'].count()
screentimes_group_movies = df_movies_screentimes_group.groupby('Screentime Group')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
screentimes_group_data_movies = pd.concat([screentimes_group_count, screentimes_group_movies], axis = 1).reset_index().rename(columns = {'Title' : 'Movies Count'})
screentimes_group_data_movies = screentimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False)
# Screentime Group with Movies Counts - All Platforms Combined
screentimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False)
| Screentime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 2 | 2 | 9573 | 2332 | 717 | 6681 | 303 |
| 1 | 1 | 6180 | 949 | 294 | 4841 | 225 |
| 3 | 3 | 449 | 147 | 15 | 315 | 7 |
| 0 | 0 | 311 | 66 | 13 | 226 | 17 |
| 4 | 4 | 25 | 7 | 3 | 17 | 0 |
| 5 | 5 | 1 | 0 | 0 | 1 | 0 |
| 6 | 6 | 1 | 0 | 0 | 1 | 0 |
| 7 | 10 | 1 | 0 | 0 | 1 | 0 |
screentimes_group_data_movies.sort_values(by = 'Screentime Group', ascending = False)
| Screentime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 7 | 10 | 1 | 0 | 0 | 1 | 0 |
| 6 | 6 | 1 | 0 | 0 | 1 | 0 |
| 5 | 5 | 1 | 0 | 0 | 1 | 0 |
| 4 | 4 | 25 | 7 | 3 | 17 | 0 |
| 3 | 3 | 449 | 147 | 15 | 315 | 7 |
| 2 | 2 | 9573 | 2332 | 717 | 6681 | 303 |
| 1 | 1 | 6180 | 949 | 294 | 4841 | 225 |
| 0 | 0 | 311 | 66 | 13 | 226 | 17 |
fig = px.bar(y = screentimes_group_data_movies['Movies Count'],
x = screentimes_group_data_movies['Screentime Group'],
color = screentimes_group_data_movies['Screentime Group'],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Movies Count', 'x' : 'Screentime : In Hours'},
title = 'Movies with Group Screentime in Hours : All Platforms')
fig.update_layout(plot_bgcolor = "white")
fig.show()
fig = px.pie(screentimes_group_data_movies[:10],
names = screentimes_group_data_movies['Screentime Group'],
values = screentimes_group_data_movies['Movies Count'],
color = screentimes_group_data_movies['Movies Count'],
color_discrete_sequence = px.colors.sequential.Teal)
fig.update_traces(textinfo = 'percent+label',
title = 'Movies Count based on Screentime Group')
fig.show()
df_screentimes_group_high_movies = screentimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False).reset_index()
df_screentimes_group_high_movies = df_screentimes_group_high_movies.drop(['index'], axis = 1)
# filter = (screentimes_group_data_movies['Movies Count'] == (screentimes_group_data_movies['Movies Count'].max()))
# df_screentimes_group_high_movies = screentimes_group_data_movies[filter]
# highest_rated_movies = screentimes_group_data_movies.loc[screentimes_group_data_movies['Movies Count'].idxmax()]
# print('\nScreentime with Highest Ever Movies Count are : All Platforms Combined\n')
df_screentimes_group_high_movies.head(5)
| Screentime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | 2 | 9573 | 2332 | 717 | 6681 | 303 |
| 1 | 1 | 6180 | 949 | 294 | 4841 | 225 |
| 2 | 3 | 449 | 147 | 15 | 315 | 7 |
| 3 | 0 | 311 | 66 | 13 | 226 | 17 |
| 4 | 4 | 25 | 7 | 3 | 17 | 0 |
df_screentimes_group_low_movies = screentimes_group_data_movies.sort_values(by = 'Movies Count', ascending = True).reset_index()
df_screentimes_group_low_movies = df_screentimes_group_low_movies.drop(['index'], axis = 1)
# filter = (screentimes_group_data_movies['Movies Count'] = = (screentimes_group_data_movies['Movies Count'].min()))
# df_screentimes_group_low_movies = screentimes_group_data_movies[filter]
# print('\nScreentime with Lowest Ever Movies Count are : All Platforms Combined\n')
df_screentimes_group_low_movies.head(5)
| Screentime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | 5 | 1 | 0 | 0 | 1 | 0 |
| 1 | 6 | 1 | 0 | 0 | 1 | 0 |
| 2 | 10 | 1 | 0 | 0 | 1 | 0 |
| 3 | 4 | 25 | 7 | 3 | 17 | 0 |
| 4 | 0 | 311 | 66 | 13 | 226 | 17 |
print(f'''
Total '{df_movies_screentimes['Screentime'].count()}' Titles are available on All Platforms, out of which\n
You Can Choose to see Movies from Total '{screentimes_group_data_movies['Screentime Group'].unique().shape[0]}' Screentime Group, They were Like this, \n
{screentimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False)['Screentime Group'].unique()} etc. \n
The Screentime Group with Highest Movies Count have '{screentimes_group_data_movies['Movies Count'].max()}' Movies Available is '{df_screentimes_group_high_movies['Screentime Group'][0]}', &\n
The Screentime Group with Lowest Movies Count have '{screentimes_group_data_movies['Movies Count'].min()}' Movies Available is '{df_screentimes_group_low_movies['Screentime Group'][0]}'
''')
Total '16541' Titles are available on All Platforms, out of which
You Can Choose to see Movies from Total '8' Screentime Group, They were Like this,
[ 2 1 3 0 4 5 6 10] etc.
The Screentime Group with Highest Movies Count have '9573' Movies Available is '2', &
The Screentime Group with Lowest Movies Count have '1' Movies Available is '5'
netflix_screentimes_group_movies = screentimes_group_data_movies[screentimes_group_data_movies['Netflix'] != 0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_screentimes_group_movies = netflix_screentimes_group_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
netflix_screentimes_group_high_movies = df_screentimes_group_high_movies.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_screentimes_group_high_movies = netflix_screentimes_group_high_movies.drop(['index'], axis = 1)
netflix_screentimes_group_low_movies = df_screentimes_group_high_movies.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_screentimes_group_low_movies = netflix_screentimes_group_low_movies.drop(['index'], axis = 1)
netflix_screentimes_group_high_movies.head(5)
| Screentime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | 2 | 9573 | 2332 | 717 | 6681 | 303 |
| 1 | 1 | 6180 | 949 | 294 | 4841 | 225 |
| 2 | 3 | 449 | 147 | 15 | 315 | 7 |
| 3 | 0 | 311 | 66 | 13 | 226 | 17 |
| 4 | 4 | 25 | 7 | 3 | 17 | 0 |
hulu_screentimes_group_movies = screentimes_group_data_movies[screentimes_group_data_movies['Hulu'] != 0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_screentimes_group_movies = hulu_screentimes_group_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
hulu_screentimes_group_high_movies = df_screentimes_group_high_movies.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_screentimes_group_high_movies = hulu_screentimes_group_high_movies.drop(['index'], axis = 1)
hulu_screentimes_group_low_movies = df_screentimes_group_high_movies.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_screentimes_group_low_movies = hulu_screentimes_group_low_movies.drop(['index'], axis = 1)
hulu_screentimes_group_high_movies.head(5)
| Screentime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | 2 | 9573 | 2332 | 717 | 6681 | 303 |
| 1 | 1 | 6180 | 949 | 294 | 4841 | 225 |
| 2 | 3 | 449 | 147 | 15 | 315 | 7 |
| 3 | 0 | 311 | 66 | 13 | 226 | 17 |
| 4 | 4 | 25 | 7 | 3 | 17 | 0 |
prime_video_screentimes_group_movies = screentimes_group_data_movies[screentimes_group_data_movies['Prime Video'] != 0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_screentimes_group_movies = prime_video_screentimes_group_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)
prime_video_screentimes_group_high_movies = df_screentimes_group_high_movies.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_screentimes_group_high_movies = prime_video_screentimes_group_high_movies.drop(['index'], axis = 1)
prime_video_screentimes_group_low_movies = df_screentimes_group_high_movies.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_screentimes_group_low_movies = prime_video_screentimes_group_low_movies.drop(['index'], axis = 1)
prime_video_screentimes_group_high_movies.head(5)
| Screentime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | 2 | 9573 | 2332 | 717 | 6681 | 303 |
| 1 | 1 | 6180 | 949 | 294 | 4841 | 225 |
| 2 | 3 | 449 | 147 | 15 | 315 | 7 |
| 3 | 0 | 311 | 66 | 13 | 226 | 17 |
| 4 | 4 | 25 | 7 | 3 | 17 | 0 |
disney_screentimes_group_movies = screentimes_group_data_movies[screentimes_group_data_movies['Disney+'] != 0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_screentimes_group_movies = disney_screentimes_group_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
disney_screentimes_group_high_movies = df_screentimes_group_high_movies.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_screentimes_group_high_movies = disney_screentimes_group_high_movies.drop(['index'], axis = 1)
disney_screentimes_group_low_movies = df_screentimes_group_high_movies.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_screentimes_group_low_movies = disney_screentimes_group_low_movies.drop(['index'], axis = 1)
disney_screentimes_group_high_movies.head(5)
| Screentime Group | Movies Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | 2 | 9573 | 2332 | 717 | 6681 | 303 |
| 1 | 1 | 6180 | 949 | 294 | 4841 | 225 |
| 2 | 0 | 311 | 66 | 13 | 226 | 17 |
| 3 | 3 | 449 | 147 | 15 | 315 | 7 |
| 4 | 4 | 25 | 7 | 3 | 17 | 0 |
print(f'''
The Screentime Group with Highest Movies Count Ever Got is '{df_screentimes_group_high_movies['Screentime Group'][0]}' : '{df_screentimes_group_high_movies['Movies Count'].max()}'\n
The Screentime Group with Lowest Movies Count Ever Got is '{df_screentimes_group_low_movies['Screentime Group'][0]}' : '{df_screentimes_group_low_movies['Movies Count'].min()}'\n
The Screentime Group with Highest Movies Count on 'Netflix' is '{netflix_screentimes_group_high_movies['Screentime Group'][0]}' : '{netflix_screentimes_group_high_movies['Netflix'].max()}'\n
The Screentime Group with Lowest Movies Count on 'Netflix' is '{netflix_screentimes_group_low_movies['Screentime Group'][0]}' : '{netflix_screentimes_group_low_movies['Netflix'].min()}'\n
The Screentime Group with Highest Movies Count on 'Hulu' is '{hulu_screentimes_group_high_movies['Screentime Group'][0]}' : '{hulu_screentimes_group_high_movies['Hulu'].max()}'\n
The Screentime Group with Lowest Movies Count on 'Hulu' is '{hulu_screentimes_group_low_movies['Screentime Group'][0]}' : '{hulu_screentimes_group_low_movies['Hulu'].min()}'\n
The Screentime Group with Highest Movies Count on 'Prime Video' is '{prime_video_screentimes_group_high_movies['Screentime Group'][0]}' : '{prime_video_screentimes_group_high_movies['Prime Video'].max()}'\n
The Screentime Group with Lowest Movies Count on 'Prime Video' is '{prime_video_screentimes_group_low_movies['Screentime Group'][0]}' : '{prime_video_screentimes_group_low_movies['Prime Video'].min()}'\n
The Screentime Group with Highest Movies Count on 'Disney+' is '{disney_screentimes_group_high_movies['Screentime Group'][0]}' : '{disney_screentimes_group_high_movies['Disney+'].max()}'\n
The Screentime Group with Lowest Movies Count on 'Disney+' is '{disney_screentimes_group_low_movies['Screentime Group'][0]}' : '{disney_screentimes_group_low_movies['Disney+'].min()}'\n
''')
The Screentime Group with Highest Movies Count Ever Got is '2' : '9573'
The Screentime Group with Lowest Movies Count Ever Got is '5' : '1'
The Screentime Group with Highest Movies Count on 'Netflix' is '2' : '2332'
The Screentime Group with Lowest Movies Count on 'Netflix' is '5' : '0'
The Screentime Group with Highest Movies Count on 'Hulu' is '2' : '717'
The Screentime Group with Lowest Movies Count on 'Hulu' is '5' : '0'
The Screentime Group with Highest Movies Count on 'Prime Video' is '2' : '6681'
The Screentime Group with Lowest Movies Count on 'Prime Video' is '5' : '1'
The Screentime Group with Highest Movies Count on 'Disney+' is '2' : '303'
The Screentime Group with Lowest Movies Count on 'Disney+' is '4' : '0'
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
n_scr_ax1 = sns.barplot(x = netflix_screentimes_group_movies['Screentime Group'][:10], y = netflix_screentimes_group_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_scr_ax2 = sns.barplot(x = hulu_screentimes_group_movies['Screentime Group'][:10], y = hulu_screentimes_group_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_scr_ax3 = sns.barplot(x = prime_video_screentimes_group_movies['Screentime Group'][:10], y = prime_video_screentimes_group_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_scr_ax4 = sns.barplot(x = disney_screentimes_group_movies['Screentime Group'][:10], y = disney_screentimes_group_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
n_scr_ax1.title.set_text(labels[0])
h_scr_ax2.title.set_text(labels[1])
p_scr_ax3.title.set_text(labels[2])
d_scr_ax4.title.set_text(labels[3])
plt.show()
plt.figure(figsize = (20, 5))
sns.lineplot(x = screentimes_group_data_movies['Screentime Group'], y = screentimes_group_data_movies['Netflix'], color = 'red')
sns.lineplot(x = screentimes_group_data_movies['Screentime Group'], y = screentimes_group_data_movies['Hulu'], color = 'lightgreen')
sns.lineplot(x = screentimes_group_data_movies['Screentime Group'], y = screentimes_group_data_movies['Prime Video'], color = 'lightblue')
sns.lineplot(x = screentimes_group_data_movies['Screentime Group'], y = screentimes_group_data_movies['Disney+'], color = 'darkblue')
plt.xlabel('Screentime Group', fontsize = 15)
plt.ylabel('Movies Count', fontsize = 15)
plt.show()
print(f'''
Accross All Platforms Total Count of Screentime Group is '{screentimes_group_data_movies['Screentime Group'].unique().shape[0]}'\n
Total Count of Screentime Group on 'Netflix' is '{netflix_screentimes_group_movies['Screentime Group'].unique().shape[0]}'\n
Total Count of Screentime Group on 'Hulu' is '{hulu_screentimes_group_movies['Screentime Group'].unique().shape[0]}'\n
Total Count of Screentime Group on 'Prime Video' is '{prime_video_screentimes_group_movies['Screentime Group'].unique().shape[0]}'\n
Total Count of Screentime Group on 'Disney+' is '{disney_screentimes_group_movies['Screentime Group'].unique().shape[0]}'\n
''')
Accross All Platforms Total Count of Screentime Group is '8'
Total Count of Screentime Group on 'Netflix' is '5'
Total Count of Screentime Group on 'Hulu' is '5'
Total Count of Screentime Group on 'Prime Video' is '8'
Total Count of Screentime Group on 'Disney+' is '4'
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
n_scr_ax1 = sns.lineplot(y = screentimes_group_data_movies['Screentime Group'], x = screentimes_group_data_movies['Netflix'], color = 'red', ax = axes[0, 0])
h_scr_ax2 = sns.lineplot(y = screentimes_group_data_movies['Screentime Group'], x = screentimes_group_data_movies['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_scr_ax3 = sns.lineplot(y = screentimes_group_data_movies['Screentime Group'], x = screentimes_group_data_movies['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_scr_ax4 = sns.lineplot(y = screentimes_group_data_movies['Screentime Group'], x = screentimes_group_data_movies['Disney+'], color = 'darkblue', ax = axes[1, 1])
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
n_scr_ax1.title.set_text(labels[0])
h_scr_ax2.title.set_text(labels[1])
p_scr_ax3.title.set_text(labels[2])
d_scr_ax4.title.set_text(labels[3])
plt.show()
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
n_ru_ax1 = sns.barplot(x = screentimes_group_data_movies['Screentime Group'][:10], y = screentimes_group_data_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = screentimes_group_data_movies['Screentime Group'][:10], y = screentimes_group_data_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = screentimes_group_data_movies['Screentime Group'][:10], y = screentimes_group_data_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = screentimes_group_data_movies['Screentime Group'][:10], y = screentimes_group_data_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
plt.show()